Skip to content

Commit

Permalink
Fix the script
Browse files Browse the repository at this point in the history
  • Loading branch information
obulat committed Aug 24, 2023
1 parent e62a452 commit 70c2bc1
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions catalog/dags/providers/provider_api_scripts/justtakeitfree.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dic
# On the first request, `prev_query_params` will be `None`. We can detect this
# and return our default params.
if not prev_query_params:
return {"page": 0, "key": Variable.get("API_KEY_JUSTTAKEITFREE")}
return {"page": 1, "key": Variable.get("API_KEY_JUSTTAKEITFREE")}
else:
return {
**prev_query_params,
Expand All @@ -40,13 +40,14 @@ def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dic

def get_batch_data(self, response_json):
if response_json and (data := response_json.get("data")):
return data[0]
return data
return None

def get_media_type(self, record: dict):
return IMAGE

def get_record_data(self, data: dict) -> dict | list[dict] | None:
def get_record_data(self, data: list[dict]) -> dict | None:
data = data[0]
if not (foreign_identifier := data.get("page_link", "").split("/")[-2]):
logger.debug("Skipping record with missing foreign_identifier")
return None
Expand All @@ -71,7 +72,9 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None:
creator_url = "https://justtakeitfree.com"
raw_tags = data.get("tags")

return {
filesize = self.get_file_info(url)

record_data = {
"foreign_landing_url": foreign_landing_url,
"url": url,
"license_info": license_info,
Expand All @@ -80,7 +83,16 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None:
"creator": creator,
"creator_url": creator_url,
"raw_tags": raw_tags,
"filesize": filesize,
}
return record_data

def get_file_info(self, url):
"""Get the image size in bytes."""
resp = self.delayed_requester.head(url)
if resp:
filesize = int(resp.headers.get("Content-Length", 0))
return filesize if filesize != 0 else None


def main():
Expand Down

0 comments on commit 70c2bc1

Please sign in to comment.