diff --git a/sh_scrapy/middlewares.py b/sh_scrapy/middlewares.py index 8c7d586..476526b 100644 --- a/sh_scrapy/middlewares.py +++ b/sh_scrapy/middlewares.py @@ -98,7 +98,7 @@ def process_response(self, request, response, spider): rs=len(response.body), duration=request.meta.get('download_latency', 0) * 1000, parent=request.meta.setdefault(HS_PARENT_ID_KEY), - fp=self._fingerprint(response.request), + fp=self._fingerprint(request), ) # Generate and set request id. request_id = next(self.request_id_sequence) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 9144817..f6d36ad 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -48,13 +48,13 @@ def test_hs_middlewares(hs_downloader_middleware, hs_spider_middleware): assert len(hs_spider_middleware._seen_requests) == 0 assert len(hs_downloader_middleware._seen_requests) == 0 - response_0.request = request_0 hs_downloader_middleware.process_response(request_0, response_0, spider) assert request_0.meta[HS_REQUEST_ID_KEY] == 0 assert request_0.meta[HS_PARENT_ID_KEY] is None assert hs_spider_middleware._seen_requests[request_0] == 0 + response_0.request = request_0 request_1 = Request(url) request_2 = Request(url) item1 = {} @@ -71,14 +71,12 @@ def test_hs_middlewares(hs_downloader_middleware, hs_spider_middleware): response_1 = Response(url) hs_downloader_middleware.process_request(request_1, spider) - response_1.request = request_1 hs_downloader_middleware.process_response(request_1, response_1, spider) assert request_1.meta[HS_REQUEST_ID_KEY] == 1 assert request_1.meta[HS_PARENT_ID_KEY] == 0 response_2 = Response(url) hs_downloader_middleware.process_request(request_2, spider) - response_2.request = request_2 hs_downloader_middleware.process_response(request_2, response_2, spider) assert request_2.meta[HS_REQUEST_ID_KEY] == 2 assert request_2.meta[HS_PARENT_ID_KEY] == 0 @@ -105,14 +103,12 @@ def __init__(self, url: str, request: Optional[Request] = None): response_1 = DummyResponse(url, request) response_2 = Response(url) hs_downloader_middleware.process_request(request, spider) - response_1.request = request hs_downloader_middleware.process_response(request, response_1, spider) with open(hs_downloader_middleware.pipe_writer.path, 'r') as tmp_file: assert tmp_file.readline() == "" assert request.meta == {} - response_2.request = request hs_downloader_middleware.process_response(request, response_2, spider) with open(hs_downloader_middleware.pipe_writer.path, 'r') as tmp_file: assert tmp_file.readline().startswith('REQ') @@ -144,7 +140,6 @@ def __init__(self, url: str, request: Optional[Request] = None): assert len(hs_spider_middleware._seen_requests) == 0 assert len(hs_downloader_middleware._seen_requests) == 0 - response_0.request = request_0 hs_downloader_middleware.process_response(request_0, response_0, spider) assert request_0.meta[HS_REQUEST_ID_KEY] == 0 @@ -161,7 +156,6 @@ def __init__(self, url: str, request: Optional[Request] = None): assert HS_REQUEST_ID_KEY not in request_1.meta assert request_1.meta[HS_PARENT_ID_KEY] == 0 - response_1.request = request_1 hs_downloader_middleware.process_response(request_1, response_1, spider) assert request_1.meta[HS_REQUEST_ID_KEY] == 1 @@ -171,13 +165,11 @@ def __init__(self, url: str, request: Optional[Request] = None): response_2_1 = DummyResponse(url, request_2) response_2_2 = Response(url) - response_2_1.request = request_2 hs_downloader_middleware.process_response(request_2, response_2_1, spider) assert request_2.meta[HS_REQUEST_ID_KEY] == 1 assert request_2.meta[HS_PARENT_ID_KEY] == 0 - response_2_2.request = request_2 hs_downloader_middleware.process_response(request_2, response_2_2, spider) assert request_2.meta[HS_REQUEST_ID_KEY] == 2