Skip to content
This repository has been archived by the owner on Feb 7, 2024. It is now read-only.

Commit

Permalink
Merge pull request #88 from Jabb0/master
Browse files Browse the repository at this point in the history
Made the download aware of the actual returned batch size when using limit
  • Loading branch information
dmarx authored Jan 31, 2021
2 parents 200ed20 + f9460f7 commit 53df79d
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions psaw/PushshiftAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,18 @@ def _handle_paging(self, url):
raise NotImplementedError(err_msg.format(self.max_results_per_request))
self._add_nec_args(self.payload)

yield self._get(url, self.payload)

if (limit is not None) & (limit == 0):
return
data = self._get(url, self.payload)
yield data
if limit is not None:
received_size = int(data['metadata']['size'])
requested_size = self.payload['limit']
# The API can decide to send less data than desired.
# We need to send another request in that case requesting the missing amount
if received_size < requested_size:
limit += requested_size - received_size

if limit == 0:
return

def _search(self,
kind,
Expand Down

0 comments on commit 53df79d

Please sign in to comment.