Skip to content

Commit b3637b6

Browse files
authored
fix: call failed_request_handler for SessionError when session rotation count exceeds maximum (#1147)
### Description - Call `failed_request_handler` for `SessionError` when session rotation count exceeds maximum
1 parent 7114e96 commit b3637b6

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

src/crawlee/crawlers/_basic/_basic_crawler.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1164,8 +1164,6 @@ async def __run_task_function(self) -> None:
11641164
await request_manager.reclaim_request(request)
11651165
await self._statistics.error_tracker_retry.add(error=session_error, context=context)
11661166
else:
1167-
self._logger.exception('Request failed and reached maximum retries', exc_info=session_error)
1168-
11691167
await wait_for(
11701168
lambda: request_manager.mark_request_as_handled(context.request),
11711169
timeout=self._internal_timeout,
@@ -1175,8 +1173,8 @@ async def __run_task_function(self) -> None:
11751173
max_retries=3,
11761174
)
11771175

1176+
await self._handle_failed_request(context, session_error)
11781177
self._statistics.record_request_processing_failure(statistics_id)
1179-
await self._statistics.error_tracker.add(error=session_error, context=context)
11801178

11811179
except ContextPipelineInterruptedError as interrupted_error:
11821180
self._logger.debug('The context pipeline was interrupted', exc_info=interrupted_error)

tests/unit/crawlers/_basic/test_basic_crawler.py

+19
Original file line numberDiff line numberDiff line change
@@ -1291,3 +1291,22 @@ async def error_req_hook(context: BasicCrawlingContext, error: Exception) -> Non
12911291
await crawler.run(requests)
12921292

12931293
assert error_handler_mock.call_count == 1
1294+
1295+
1296+
async def test_handles_session_error_in_failed_request_handler() -> None:
1297+
crawler = BasicCrawler(max_session_rotations=1)
1298+
handler_requests = set()
1299+
1300+
@crawler.router.default_handler
1301+
async def handler(context: BasicCrawlingContext) -> None:
1302+
raise SessionError('blocked')
1303+
1304+
@crawler.failed_request_handler
1305+
async def failed_request_handler(context: BasicCrawlingContext, error: Exception) -> None:
1306+
handler_requests.add(context.request.url)
1307+
1308+
requests = ['http://a.com/', 'http://b.com/', 'http://c.com/']
1309+
1310+
await crawler.run(requests)
1311+
1312+
assert set(requests) == handler_requests

0 commit comments

Comments
 (0)