Skip to content

Commit

Permalink
Add process_spider_output_async() to the spider middleware.
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed Dec 27, 2024
1 parent 3a18d1d commit b96b375
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions sh_scrapy/middlewares.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import itertools
from warnings import warn
from weakref import WeakKeyDictionary

from scrapy import Request
Expand Down Expand Up @@ -28,11 +29,22 @@ def process_spider_output(self, response, result, spider):
parent = self._seen_requests.pop(response.request, None)
for x in result:
if isinstance(x, Request):
x.meta[HS_PARENT_ID_KEY] = parent
# Remove request id if it was for some reason set in the request coming from Spider.
x.meta.pop(HS_REQUEST_ID_KEY, None)
self._process_request(x, parent)
yield x

async def process_spider_output_async(self, response, result, spider):
parent = self._seen_requests.pop(response.request, None)
async for x in result:
if isinstance(x, Request):
self._process_request(x, parent)
yield x


def _process_request(self, request, parent):
request.meta[HS_PARENT_ID_KEY] = parent
# Remove request id if it was for some reason set in the request coming from Spider.
request.meta.pop(HS_REQUEST_ID_KEY, None)


class HubstorageDownloaderMiddleware:
"""Hubstorage dowloader middleware.
Expand Down

0 comments on commit b96b375

Please sign in to comment.