Skip to content

Commit 8c25fc0

Browse files
committed
solve
2 parents e6d7a22 + e87d08e commit 8c25fc0

File tree

5 files changed

+37
-0
lines changed

5 files changed

+37
-0
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ All notable changes to this project will be documented in this file.
1111
- Add `block_requests` helper for `PlaywrightCrawler` ([#919](https://github.com/apify/crawlee-python/pull/919)) ([1030459](https://github.com/apify/crawlee-python/commit/103045994908f80cffee5ccfff91a040e0042f48)) by [@Mantisus](https://github.com/Mantisus), closes [#848](https://github.com/apify/crawlee-python/issues/848)
1212
- Return request handlers from decorator methods to allow further decoration ([#934](https://github.com/apify/crawlee-python/pull/934)) ([9ec0aae](https://github.com/apify/crawlee-python/commit/9ec0aae54e2a340d29c893567ae80bf8bd4510a9)) by [@mylank](https://github.com/mylank)
1313
- Add `transform_request_function` for `enqueue_links` ([#923](https://github.com/apify/crawlee-python/pull/923)) ([6b15957](https://github.com/apify/crawlee-python/commit/6b159578f612251e6d2253a72b6521430f4f9b09)) by [@Mantisus](https://github.com/Mantisus), closes [#894](https://github.com/apify/crawlee-python/issues/894)
14+
- Add `time_remaining_secs` property to `MIGRATING` event data ([#940](https://github.com/apify/crawlee-python/pull/940)) ([b44501b](https://github.com/apify/crawlee-python/commit/b44501bcadbd12673a8f47aa92f12da8e404f60b)) by [@fnesveda](https://github.com/fnesveda)
1415

1516
### 🐛 Bug Fixes
1617

1718
- Fix crawler not retrying user handler if there was timeout in the handler ([#909](https://github.com/apify/crawlee-python/pull/909)) ([f4090ef](https://github.com/apify/crawlee-python/commit/f4090ef0ea0281d53dab16a77ceea2ef6ac43d76)) by [@Pijukatel](https://github.com/Pijukatel), closes [#907](https://github.com/apify/crawlee-python/issues/907)
1819
- Optimize memory consumption for `HttpxHttpClient`, fix proxy handling ([#905](https://github.com/apify/crawlee-python/pull/905)) ([d7ad480](https://github.com/apify/crawlee-python/commit/d7ad480834263ae0480049cb0a8db4dfc3946d8d)) by [@Mantisus](https://github.com/Mantisus), closes [#895](https://github.com/apify/crawlee-python/issues/895)
20+
- Fix `BrowserPool` and `PlaywrightBrowserPlugin` closure ([#932](https://github.com/apify/crawlee-python/pull/932)) ([997543d](https://github.com/apify/crawlee-python/commit/997543d2fa5afba49929f4407ee95d7a4933a50d)) by [@Mantisus](https://github.com/Mantisus)
1921

2022

2123
<!-- git-cliff-unreleased-end -->

src/crawlee/_utils/models.py

+27
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ def _timedelta_to_ms(td: timedelta | None) -> float | None:
1717
return int(round(td.total_seconds() * 1000))
1818

1919

20+
def _timedelta_to_secs(td: timedelta | None) -> float | None:
21+
if td == timedelta.max:
22+
return float('inf')
23+
if td is None:
24+
return td
25+
return td.total_seconds()
26+
27+
2028
_number_parser = TypeAdapter(float)
2129

2230

@@ -35,4 +43,23 @@ def _timedelta_from_ms(value: float | timedelta | Any | None, handler: Callable[
3543
return timedelta(milliseconds=value)
3644

3745

46+
def _timedelta_from_secs(
47+
value: float | timedelta | Any | None,
48+
handler: Callable[[Any], timedelta],
49+
) -> timedelta | None:
50+
if value == float('inf'):
51+
return timedelta.max
52+
53+
# If the value is a string-encoded number, decode it
54+
if isinstance(value, str):
55+
with suppress(ValidationError):
56+
value = _number_parser.validate_python(value)
57+
58+
if not isinstance(value, (int, float)):
59+
return handler(value)
60+
61+
return timedelta(seconds=value)
62+
63+
3864
timedelta_ms = Annotated[timedelta, PlainSerializer(_timedelta_to_ms), WrapValidator(_timedelta_from_ms)]
65+
timedelta_secs = Annotated[timedelta, PlainSerializer(_timedelta_to_secs), WrapValidator(_timedelta_from_secs)]

src/crawlee/browsers/_browser_pool.py

+2
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ async def __aexit__(
204204

205205
for browser in self._active_browsers + self._inactive_browsers:
206206
await browser.close(force=True)
207+
self._active_browsers.clear()
208+
self._inactive_browsers.clear()
207209

208210
for plugin in self._plugins:
209211
await plugin.__aexit__(exc_type, exc_value, exc_traceback)

src/crawlee/browsers/_playwright_browser_plugin.py

+1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ async def __aexit__(
135135
raise RuntimeError(f'The {self.__class__.__name__} is not active.')
136136

137137
await self._playwright_context_manager.__aexit__(exc_type, exc_value, exc_traceback)
138+
self._playwright_context_manager = async_playwright()
138139
self._active = False
139140

140141
@override

src/crawlee/events/_types.py

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pydantic import BaseModel, ConfigDict, Field
88

99
from crawlee._utils.docs import docs_group
10+
from crawlee._utils.models import timedelta_secs
1011
from crawlee._utils.system import CpuInfo, MemoryUsageInfo
1112

1213

@@ -59,6 +60,10 @@ class EventMigratingData(BaseModel):
5960

6061
model_config = ConfigDict(populate_by_name=True)
6162

63+
# The remaining time in seconds before the migration is forced and the process is killed
64+
# Optional because it's not present when the event handler is called manually
65+
time_remaining: Annotated[timedelta_secs | None, Field(alias='timeRemainingSecs')] = None
66+
6267

6368
@docs_group('Event payloads')
6469
class EventAbortingData(BaseModel):

0 commit comments

Comments
 (0)