Skip to content
This repository has been archived by the owner on Feb 7, 2024. It is now read-only.

Commit

Permalink
Made the download aware of the actual returned batch size
Browse files Browse the repository at this point in the history
  • Loading branch information
Jabb0 committed Nov 28, 2020
1 parent 200ed20 commit acb4a8a
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions psaw/PushshiftAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,15 @@ def _handle_paging(self, url):
raise NotImplementedError(err_msg.format(self.max_results_per_request))
self._add_nec_args(self.payload)

yield self._get(url, self.payload)

if (limit is not None) & (limit == 0):
data = self._get(url, self.payload)
yield data
received_size = int(data['metadata']['size'])
requested_size = self.payload['limit']
# Apparently the API can decide to send less data than desired. We need to send another request in that case
if received_size < requested_size:
limit += requested_size - received_size

if (limit is not None) and (limit == 0):
return

def _search(self,
Expand Down

0 comments on commit acb4a8a

Please sign in to comment.