From 96fa262619445cb75356c6fa0af7a14242ea3063 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Sun, 23 Apr 2023 10:18:25 +0300 Subject: [PATCH] Replace `media_url` with `url` in provider scripts --- catalog/dags/common/storage/audio.py | 9 +-- catalog/dags/common/storage/image.py | 9 +-- catalog/dags/common/storage/media.py | 6 +- catalog/dags/common/tsv_cleaner.py | 2 +- .../provider_api_scripts/brooklyn_museum.py | 2 +- .../provider_api_scripts/cleveland_museum.py | 2 +- .../provider_api_scripts/europeana.py | 2 +- .../provider_api_scripts/finnish_museums.py | 2 +- .../providers/provider_api_scripts/flickr.py | 4 +- .../provider_api_scripts/freesound.py | 6 +- .../providers/provider_api_scripts/jamendo.py | 10 ++-- .../metropolitan_museum.py | 2 +- .../provider_api_scripts/museum_victoria.py | 2 +- .../providers/provider_api_scripts/nappy.py | 2 +- .../providers/provider_api_scripts/nypl.py | 18 +++--- .../provider_api_scripts/phylopic.py | 2 +- .../provider_api_scripts/rawpixel.py | 2 +- .../provider_api_scripts/science_museum.py | 22 ++++---- .../provider_api_scripts/smithsonian.py | 2 +- .../providers/provider_api_scripts/smk.py | 2 +- .../provider_api_scripts/stocksnap.py | 2 +- .../provider_api_scripts/wikimedia_commons.py | 4 +- .../provider_api_scripts/wordpress.py | 2 +- catalog/docs/data_models.md | 12 ++-- .../templates/template_provider.py_template | 14 ++--- .../tests/dags/common/storage/test_audio.py | 26 ++++----- .../tests/dags/common/storage/test_image.py | 13 ++--- .../tests/dags/common/storage/test_media.py | 56 +++++++++---------- catalog/tests/dags/common/test_tsv_cleaner.py | 4 +- .../inaturalist/full_db_response.txt | 2 +- .../sample_additional_image_data.json | 6 +- .../sample_image_data.json | 2 +- .../mock_provider_data_ingester.py | 18 ++---- .../resources/smk/expected_image_data_hq.json | 2 +- .../smk/expected_image_data_legacy.json | 2 +- .../test_brooklyn_museum.py | 2 +- .../provider_api_scripts/test_europeana.py | 4 +- .../test_finnish_museums.py | 2 +- .../provider_api_scripts/test_flickr.py | 8 +-- .../provider_api_scripts/test_freesound.py | 4 +- .../provider_api_scripts/test_jamendo.py | 2 +- .../test_metropolitan_museum.py | 2 +- .../test_museum_victoria.py | 4 +- .../provider_api_scripts/test_nappy.py | 2 +- .../provider_api_scripts/test_nypl.py | 2 +- .../provider_api_scripts/test_phylopic.py | 2 +- .../provider_api_scripts/test_rawpixel.py | 2 +- .../test_science_museum.py | 2 +- .../provider_api_scripts/test_smithsonian.py | 6 +- .../provider_api_scripts/test_stocksnap.py | 4 +- .../test_wikimedia_commons.py | 10 ++-- .../provider_api_scripts/test_wordpress.py | 2 +- 52 files changed, 156 insertions(+), 177 deletions(-) diff --git a/catalog/dags/common/storage/audio.py b/catalog/dags/common/storage/audio.py index b4bcb71ca7f..8d8947f843e 100644 --- a/catalog/dags/common/storage/audio.py +++ b/catalog/dags/common/storage/audio.py @@ -44,7 +44,7 @@ def __init__( def add_item( self, foreign_landing_url: str, - audio_url: str, + url: str, license_info: LicenseInfo, thumbnail_url: str | None = None, filesize: int | None = None, @@ -78,7 +78,7 @@ def add_item( foreign_landing_url: URL of page where the audio lives on the source website. - audio_url: Direct link to the audio file + url: Direct link to the audio file license_info: LicenseInfo object that has - the URL of the license for the audio, - string representation of the license, @@ -154,7 +154,7 @@ def add_item( audio_data = { "foreign_landing_url": foreign_landing_url, - "audio_url": audio_url, + "url": url, "license_info": license_info, "thumbnail_url": thumbnail_url, "filesize": filesize, @@ -188,9 +188,6 @@ def _get_audio(self, **kwargs) -> Audio | None: audio_metadata = self.clean_media_metadata(**kwargs) if audio_metadata is None: return None - # Convert the `audio_url` key used in AudioStore, TSV and - # provider API scripts into `url` key used in db - audio_metadata["url"] = audio_metadata.pop("audio_url") # Validate that duration does not exceed Postgres int maximum audio_metadata["duration"] = self._validate_integer( audio_metadata.get("duration") diff --git a/catalog/dags/common/storage/image.py b/catalog/dags/common/storage/image.py index 01ad1f14a1f..e32192dcac9 100644 --- a/catalog/dags/common/storage/image.py +++ b/catalog/dags/common/storage/image.py @@ -44,7 +44,7 @@ def __init__( def add_item( self, foreign_landing_url: str, - image_url: str, + url: str, license_info: LicenseInfo, thumbnail_url: str | None = None, filesize: int | None = None, @@ -69,7 +69,7 @@ def add_item( Required Arguments: foreign_landing_url: URL of page where the image lives on the source website. - image_url: Direct link to the image file + url: Direct link to the image file license_info: LicenseInfo object that has - the URL of the license for the image, @@ -124,7 +124,7 @@ def add_item( image_data = { "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": url, "thumbnail_url": thumbnail_url, "filesize": filesize, "filetype": filetype, @@ -152,9 +152,6 @@ def _get_image(self, **kwargs) -> Image | None: image_metadata = self.clean_media_metadata(**kwargs) if image_metadata is None: return None - # Convert the `image_url` key used in ImageStore, TSV and - # provider API scripts into `url` key used in db - image_metadata["url"] = image_metadata.pop("image_url") return Image(**image_metadata) diff --git a/catalog/dags/common/storage/media.py b/catalog/dags/common/storage/media.py index 791d4adea08..9f91cccbbdf 100644 --- a/catalog/dags/common/storage/media.py +++ b/catalog/dags/common/storage/media.py @@ -123,13 +123,13 @@ def clean_media_metadata(self, **media_data) -> dict | None: for field in [ "foreign_identifier", "foreign_landing_url", - f"{self.media_type}_url", + "url", ]: if media_data.get(field) is None: raise ValueError(f"Record missing required field: `{field}`") for field in [ - f"{self.media_type}_url", + "url", "foreign_landing_url", "thumbnail_url", "creator_url", @@ -150,7 +150,7 @@ def clean_media_metadata(self, **media_data) -> dict | None: media_data["ingestion_type"] = "provider_api" media_data["filetype"] = self._validate_filetype( - media_data["filetype"], media_data[f"{self.media_type}_url"] + media_data["filetype"], media_data["url"] ) media_data["filesize"] = self._validate_integer(media_data.get("filesize")) diff --git a/catalog/dags/common/tsv_cleaner.py b/catalog/dags/common/tsv_cleaner.py index 03422fc3013..2410bf0fbfa 100644 --- a/catalog/dags/common/tsv_cleaner.py +++ b/catalog/dags/common/tsv_cleaner.py @@ -41,7 +41,7 @@ def _process_row(tsv_row): image_store = _image_store_dict[row_image.provider] image_store.add_item( foreign_landing_url=row_image.foreign_landing_url, - image_url=row_image.url, + url=row_image.url, thumbnail_url=row_image.thumbnail_url, license_info=get_license_info( license_url=get_license_url(row_meta_data), diff --git a/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py b/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py index f1eb949807a..0084b6a7b54 100644 --- a/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py +++ b/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py @@ -127,7 +127,7 @@ def _handle_object_data(data, license_url) -> list[dict]: images.append( { "foreign_landing_url": foreign_url, - "image_url": image_url, + "url": image_url, "license_info": license_info, "foreign_identifier": foreign_id, "width": width, diff --git a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py index 0afac3c4bab..c73cc0c16cb 100644 --- a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py +++ b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py @@ -60,7 +60,7 @@ def get_record_data(self, data): "foreign_landing_url": data.get("url"), "title": data.get("title", None), "creator": creator_name, - "image_url": image["url"], + "url": image["url"], "width": self._get_int_value(image, "width"), "height": self._get_int_value(image, "height"), "filesize": self._get_int_value(image, "filesize"), diff --git a/catalog/dags/providers/provider_api_scripts/europeana.py b/catalog/dags/providers/provider_api_scripts/europeana.py index 5f52ea4d426..e26ac139b31 100644 --- a/catalog/dags/providers/provider_api_scripts/europeana.py +++ b/catalog/dags/providers/provider_api_scripts/europeana.py @@ -62,7 +62,7 @@ def get_record_data(self, data: dict) -> dict: try: record = { "foreign_landing_url": self._get_foreign_landing_url(data), - "image_url": self._get_image_url(data), + "url": self._get_image_url(data), "foreign_identifier": self._get_foreign_identifier(data), "meta_data": self._get_meta_data_dict(data), "title": self._get_title(data), diff --git a/catalog/dags/providers/provider_api_scripts/finnish_museums.py b/catalog/dags/providers/provider_api_scripts/finnish_museums.py index d0694b1dc73..730892b9489 100644 --- a/catalog/dags/providers/provider_api_scripts/finnish_museums.py +++ b/catalog/dags/providers/provider_api_scripts/finnish_museums.py @@ -136,7 +136,7 @@ def get_record_data(self, data): "license_info": get_license_info(license_url), "foreign_identifier": foreign_identifier, "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": image_url, "title": title, "source": source, "creator": creator, diff --git a/catalog/dags/providers/provider_api_scripts/flickr.py b/catalog/dags/providers/provider_api_scripts/flickr.py index 9fbd74597fb..3ac4ae0e2b7 100644 --- a/catalog/dags/providers/provider_api_scripts/flickr.py +++ b/catalog/dags/providers/provider_api_scripts/flickr.py @@ -222,7 +222,7 @@ def get_record_data(self, data): return None image_size = self._get_largest_image_size(data) - if (image_url := data.get(f"url_{image_size}")) is None: + if not (url := data.get(f"url_{image_size}")): return None if (foreign_id := data.get("id")) is None: @@ -254,7 +254,7 @@ def get_record_data(self, data): return { "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": url, "license_info": license_info, "foreign_identifier": foreign_id, "width": width, diff --git a/catalog/dags/providers/provider_api_scripts/freesound.py b/catalog/dags/providers/provider_api_scripts/freesound.py index 03572a262a0..bfd6c0b082f 100644 --- a/catalog/dags/providers/provider_api_scripts/freesound.py +++ b/catalog/dags/providers/provider_api_scripts/freesound.py @@ -203,7 +203,7 @@ def _get_audio_files( return None, None main_file = { - "audio_url": preview_url, + "url": preview_url, "filetype": self.preferred_preview.split("-")[-1], "bit_rate": FreesoundDataIngester.preview_bitrates[self.preferred_preview], "filesize": int(filesize), @@ -239,7 +239,7 @@ def get_record_data(self, media_data: dict) -> dict | list[dict] | None: if item_license is None: return None - # We use the mp3-hq preview url as `audio_url` as the main url + # We use the mp3-hq preview url as `url` as the main url # for playing on the frontend, # and the actual uploaded file as an alt_file that is available # for download (and requires a user to be authenticated to download) @@ -270,7 +270,7 @@ def get_record_data(self, media_data: dict) -> dict | list[dict] | None: "audio_set": audio_set, "set_url": set_url, "alt_files": alt_files, - # audio_url, filetype, bit_rate + # url, filetype, bit_rate **main_audio, } diff --git a/catalog/dags/providers/provider_api_scripts/jamendo.py b/catalog/dags/providers/provider_api_scripts/jamendo.py index a5725d2f705..6c7e15489f4 100644 --- a/catalog/dags/providers/provider_api_scripts/jamendo.py +++ b/catalog/dags/providers/provider_api_scripts/jamendo.py @@ -124,12 +124,12 @@ def _get_audio_url(self, data): >>> _remove_param_from_url(url, "from") 'https://prod-1.storage.jamendo.com/?trackid=1532771&format=mp31' :return: Tuple with main audio file information: - - audio_url + - url - duration (in milliseconds) """ - if (audio_url := data.get("audio")) is None: + if not (url := data.get("audio")): return None - return self._remove_param_from_url(audio_url, "from") + return self._remove_param_from_url(url, "from") @staticmethod def _get_creator_data(data): @@ -184,7 +184,7 @@ def get_record_data(self, data): if (foreign_landing_url := data.get("shareurl")) is None: return None - if (audio_url := self._get_audio_url(data)) is None: + if (url := self._get_audio_url(data)) is None: return None license_url = data.get("license_ccurl") @@ -224,7 +224,7 @@ def get_record_data(self, data): "creator_url": creator_url, "foreign_identifier": foreign_identifier, "foreign_landing_url": foreign_landing_url, - "audio_url": audio_url, + "url": url, "duration": duration, "filetype": filetype, "thumbnail_url": thumbnail, diff --git a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py index 071f48434d5..5e0843c3bb3 100644 --- a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py +++ b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py @@ -114,7 +114,7 @@ def get_record_data(self, object_id): return [ { "foreign_landing_url": foreign_landing_url, - "image_url": img, + "url": img, "license_info": self.DEFAULT_LICENSE_INFO, "foreign_identifier": self._get_foreign_id(object_id, img), "creator": artist, diff --git a/catalog/dags/providers/provider_api_scripts/museum_victoria.py b/catalog/dags/providers/provider_api_scripts/museum_victoria.py index c28ad3d568b..74580e5bc8b 100644 --- a/catalog/dags/providers/provider_api_scripts/museum_victoria.py +++ b/catalog/dags/providers/provider_api_scripts/museum_victoria.py @@ -107,7 +107,7 @@ def _get_images(media_data) -> list[ImageDetails]: image: ImageDetails = { "foreign_identifier": image_id, - "image_url": image_url, + "url": image_url, "height": height, "width": width, "license_info": license_info, diff --git a/catalog/dags/providers/provider_api_scripts/nappy.py b/catalog/dags/providers/provider_api_scripts/nappy.py index 6338151546c..c8e3b85173b 100644 --- a/catalog/dags/providers/provider_api_scripts/nappy.py +++ b/catalog/dags/providers/provider_api_scripts/nappy.py @@ -93,7 +93,7 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None: return { "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": image_url, "thumbnail_url": thumbnail_url, "license_info": self.license_info, "foreign_identifier": foreign_identifier, diff --git a/catalog/dags/providers/provider_api_scripts/nypl.py b/catalog/dags/providers/provider_api_scripts/nypl.py index f86e0222efb..f2473fd6294 100644 --- a/catalog/dags/providers/provider_api_scripts/nypl.py +++ b/catalog/dags/providers/provider_api_scripts/nypl.py @@ -52,7 +52,7 @@ class NyplDataIngester(ProviderDataIngester): # NYPL returns a list of image objects, with the dimension encoded # in the URL's query parameter. # This list is in order from the largest image to the smallest one. - image_url_dimensions = ["g", "v", "q", "w", "r"] + url_dimensions = ["g", "v", "q", "w", "r"] def __init__(self, *args, **kwargs): NYPL_API = Variable.get("API_KEY_NYPL") @@ -120,8 +120,8 @@ def get_record_data(self, data): continue image_link = capture.get("imageLinks", {}).get("imageLink", []) - image_url, filetype = self._get_image_data(image_link) - if not image_url: + url, filetype = self._get_image_data(image_link) + if not url: continue foreign_landing_url = capture.get("itemLink", {}).get("$") @@ -132,7 +132,7 @@ def get_record_data(self, data): image_data = { "foreign_identifier": image_id, "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": url, "license_info": get_license_info(license_url=license_url), "title": title, "creator": creator, @@ -171,9 +171,9 @@ def _get_image_data(images) -> tuple[None, None] | tuple[str, str]: "description": "Cropped .jpeg (1600 pixels on the long side)" } Selects the largest image based on the image URL's `t` query parameter - and image_url_dimensions. + and url_dimensions. """ - # Create a dict with the NyplDataIngester.image_url_dimensions as keys, + # Create a dict with the NyplDataIngester.url_dimensions as keys, # and image data as value. image_types = { parse_qs(urlparse(img["$"]).query)["t"][0]: i @@ -185,17 +185,17 @@ def _get_image_data(images) -> tuple[None, None] | tuple[str, str]: # Select the dict containing the URL for the largest image. # The image size is encoded in the URL query parameter `t`. # The list of dimensions is sorted by size of the corresponding image. - for dimension in NyplDataIngester.image_url_dimensions: + for dimension in NyplDataIngester.url_dimensions: preferred_image_index = image_types.get(dimension) if preferred_image_index is not None: preferred_image = images[preferred_image_index] # Removes the `download` query to get the viewable image URL - image_url = preferred_image["$"].replace("&download=1", "") + url = preferred_image["$"].replace("&download=1", "") filetype = NyplDataIngester._get_filetype( preferred_image["description"] ) - return image_url, filetype + return url, filetype return None, None diff --git a/catalog/dags/providers/provider_api_scripts/phylopic.py b/catalog/dags/providers/provider_api_scripts/phylopic.py index 8172eddba24..4a14522216d 100644 --- a/catalog/dags/providers/provider_api_scripts/phylopic.py +++ b/catalog/dags/providers/provider_api_scripts/phylopic.py @@ -110,7 +110,7 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None: "license_info": get_license_info(license_url=license_url), "foreign_identifier": uid, "foreign_landing_url": foreign_url, - "image_url": img_url, + "url": img_url, "title": title, "creator": creator, "creator_url": creator_url, diff --git a/catalog/dags/providers/provider_api_scripts/rawpixel.py b/catalog/dags/providers/provider_api_scripts/rawpixel.py index acde719bb02..a988319c6cd 100644 --- a/catalog/dags/providers/provider_api_scripts/rawpixel.py +++ b/catalog/dags/providers/provider_api_scripts/rawpixel.py @@ -265,7 +265,7 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None: width, height = self._get_image_properties(data) return { "foreign_landing_url": foreign_url, - "image_url": image_url, + "url": image_url, "license_info": license_info, "foreign_identifier": foreign_id, "width": width, diff --git a/catalog/dags/providers/provider_api_scripts/science_museum.py b/catalog/dags/providers/provider_api_scripts/science_museum.py index abd3aad873b..01c8df85526 100644 --- a/catalog/dags/providers/provider_api_scripts/science_museum.py +++ b/catalog/dags/providers/provider_api_scripts/science_museum.py @@ -131,12 +131,12 @@ def get_record_data(self, record): continue processed = image_data.get("processed") ( - image_url, + url, height, width, filetype, ) = self._get_image_info(processed) - if image_url is None: + if url is None: continue license_pair = self._get_license(image_data) @@ -149,7 +149,7 @@ def get_record_data(self, record): image = { "foreign_identifier": foreign_id, "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": url, "height": height, "width": width, "filetype": filetype, @@ -173,12 +173,12 @@ def _get_creator_info(attributes): return creator_info @staticmethod - def check_url(image_url: str | None) -> str | None: - if not image_url: + def check_url(url: str | None) -> str | None: + if not url: return None - if image_url.startswith("http"): - return image_url - return f"https://coimages.sciencemuseumgroup.org.uk/images/{image_url}" + if url.startswith("http"): + return url + return f"https://coimages.sciencemuseumgroup.org.uk/images/{url}" @staticmethod def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]: @@ -206,11 +206,11 @@ def _get_image_info( if image_data is None: image_data = processed.get("medium", {}) - image_url = ScienceMuseumDataIngester.check_url(image_data.get("location")) - if image_url: + url = ScienceMuseumDataIngester.check_url(image_data.get("location")) + if url: filetype = image_data.get("format") height, width = ScienceMuseumDataIngester._get_dimensions(image_data) - return image_url, height, width, filetype + return url, height, width, filetype @staticmethod def _get_first_list_value(key: str, attributes: dict) -> str | None: diff --git a/catalog/dags/providers/provider_api_scripts/smithsonian.py b/catalog/dags/providers/provider_api_scripts/smithsonian.py index 62aeff083be..bd3b6fda968 100644 --- a/catalog/dags/providers/provider_api_scripts/smithsonian.py +++ b/catalog/dags/providers/provider_api_scripts/smithsonian.py @@ -299,7 +299,7 @@ def _get_associated_images(image_list, partial_image_data: dict) -> list: images.append( { **partial_image_data, - "image_url": image_url, + "url": image_url, "foreign_identifier": foreign_identifier, } ) diff --git a/catalog/dags/providers/provider_api_scripts/smk.py b/catalog/dags/providers/provider_api_scripts/smk.py index efd650c2863..ceef3af476a 100644 --- a/catalog/dags/providers/provider_api_scripts/smk.py +++ b/catalog/dags/providers/provider_api_scripts/smk.py @@ -108,7 +108,7 @@ def _get_images(item: dict) -> list: images.append( { "id": image_id, - "image_url": image_url, + "url": image_url, "thumbnail_url": thumbnail_url, "height": height, "width": width, diff --git a/catalog/dags/providers/provider_api_scripts/stocksnap.py b/catalog/dags/providers/provider_api_scripts/stocksnap.py index fd9b08a012f..1181432c942 100644 --- a/catalog/dags/providers/provider_api_scripts/stocksnap.py +++ b/catalog/dags/providers/provider_api_scripts/stocksnap.py @@ -99,7 +99,7 @@ def get_record_data(self, data): "creator_url": creator_url, "foreign_identifier": foreign_id, "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": image_url, "filesize": filesize, "filetype": "jpg", "height": height, diff --git a/catalog/dags/providers/provider_api_scripts/wikimedia_commons.py b/catalog/dags/providers/provider_api_scripts/wikimedia_commons.py index 0a95d85e3ae..2fa04b7ffe4 100644 --- a/catalog/dags/providers/provider_api_scripts/wikimedia_commons.py +++ b/catalog/dags/providers/provider_api_scripts/wikimedia_commons.py @@ -345,7 +345,7 @@ def get_record_data(self, record): @staticmethod def get_image_record_data(record_data, media_info): """Extend record_data with image-specific fields.""" - record_data["image_url"] = record_data.pop("media_url") + record_data["url"] = record_data.pop("media_url") if record_data["filetype"] == "svg": record_data["category"] = "illustration" @@ -357,7 +357,7 @@ def get_image_record_data(record_data, media_info): def get_audio_record_data(self, record_data, media_info): """Extend record_data with audio-specific fields.""" - record_data["audio_url"] = record_data.pop("media_url") + record_data["url"] = record_data.pop("media_url") duration = int(float(media_info.get("duration", 0)) * 1000) record_data["duration"] = duration diff --git a/catalog/dags/providers/provider_api_scripts/wordpress.py b/catalog/dags/providers/provider_api_scripts/wordpress.py index 1731dcd159c..a0bab4ab160 100644 --- a/catalog/dags/providers/provider_api_scripts/wordpress.py +++ b/catalog/dags/providers/provider_api_scripts/wordpress.py @@ -122,7 +122,7 @@ def get_record_data(self, data): "creator_url": author_url, "foreign_identifier": foreign_identifier, "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": image_url, "height": height, "width": width, "filesize": filesize, diff --git a/catalog/docs/data_models.md b/catalog/docs/data_models.md index 43482fff898..cc1e78cf2b2 100644 --- a/catalog/docs/data_models.md +++ b/catalog/docs/data_models.md @@ -9,12 +9,12 @@ Catalog data models. ## Required Fields -| field name | description | -| ------------------------- || -| _foreign_identifier_ | Unique identifier for the record on the source site. | -| _foreign_landing_url_ | URL of page where the record lives on the source website. | -| _audio_url_ / _image_url_ | Direct link to the media file. Note that until [issue #784 is addressed](https://github.com/WordPress/openverse-catalog/issues/784) the field name differs depending on media type. | -| _license_info_ | [LicenseInfo object](https://github.com/WordPress/openverse-catalog/blob/8423590fd86a0a3272ca91bc11f2f37979048181/openverse_catalog/dags/common/licenses/licenses.py#L25) that has (1) the URL of the license for the record, (2) string representation of the license, (3) version of the license, (4) raw license URL that was by provider, if different from canonical URL. Usually generated by calling [`get_license_info`](https://github.com/WordPress/openverse-catalog/blob/8423590fd86a0a3272ca91bc11f2f37979048181/openverse_catalog/dags/common/licenses/licenses.py#L29) on respective fields returns/available from the API. | +| field name | description | +| --------------------- || +| _foreign_identifier_ | Unique identifier for the record on the source site. | +| _foreign_landing_url_ | URL of page where the record lives on the source website. | +| _url_ | Direct link to the media file. | +| _license_info_ | [LicenseInfo object](https://github.com/WordPress/openverse-catalog/blob/8423590fd86a0a3272ca91bc11f2f37979048181/openverse_catalog/dags/common/licenses/licenses.py#L25) that has (1) the URL of the license for the record, (2) string representation of the license, (3) version of the license, (4) raw license URL that was by provider, if different from canonical URL. Usually generated by calling [`get_license_info`](https://github.com/WordPress/openverse-catalog/blob/8423590fd86a0a3272ca91bc11f2f37979048181/openverse_catalog/dags/common/licenses/licenses.py#L29) on respective fields returns/available from the API. | ## Optional Fields diff --git a/catalog/templates/template_provider.py_template b/catalog/templates/template_provider.py_template index 1adab17ec2a..4fecc1fe084 100644 --- a/catalog/templates/template_provider.py_template +++ b/catalog/templates/template_provider.py_template @@ -89,19 +89,17 @@ class {provider}DataIngester(ProviderDataIngester): # - foreign_identifier # - foreign_landing_url # - license_info - # - image_url / audio_url + # - url # - # If a required field is missing, return early to prevent unnecesary + # If a required field is missing, return early to prevent unnecessary # processing. - if (foreign_identifier := data.get("foreign_id")) is None: + if not (foreign_identifier := data.get("foreign_id")): return None - if (foreign_landing_url := data.get("url")) is None: + if not (foreign_landing_url := data.get("url")): return None - # TODO: Note the url field name differs depending on field type. Append - # `image_url` or `audio_url` depending on the type of record being processed. - if (image_url := data.get("image_url")) is None: + if not (url := data.get("url")): return None # Use the `get_license_info` utility to get license information from a URL. @@ -136,7 +134,7 @@ class {provider}DataIngester(ProviderDataIngester): return { "foreign_landing_url": foreign_landing_url, - "image_url": image_url, + "url": url, "license_info": license_info, # Optional fields "foreign_identifier": foreign_identifier, diff --git a/catalog/tests/dags/common/storage/test_audio.py b/catalog/tests/dags/common/storage/test_audio.py index ce278978476..befe6cce396 100644 --- a/catalog/tests/dags/common/storage/test_audio.py +++ b/catalog/tests/dags/common/storage/test_audio.py @@ -18,7 +18,7 @@ mock_audio_args = { "foreign_landing_url": "https://landing_page.com", - "audio_url": "https://audiourl.com", + "url": "https://audiourl.com", "license_info": BY_LICENSE_INFO, "foreign_identifier": "foreign_id", "thumbnail_url": "https://thumbnail.com", @@ -60,7 +60,7 @@ def test_AudioStore_add_item_adds_realistic_audio_to_buffer(): audio_store.add_item( foreign_identifier="01", foreign_landing_url="https://audios.org/audio01", - audio_url="https://audios.org/audio01.jpg", + url="https://audios.org/audio01.jpg", license_info=license_info, ingestion_type="provider_api", ) @@ -72,25 +72,25 @@ def test_AudioStore_add_item_adds_multiple_audios_to_buffer(): audio_store.add_item( foreign_identifier="01", foreign_landing_url="https://audios.org/audio01", - audio_url="https://audios.org/audio01.jpg", + url="https://audios.org/audio01.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="02", foreign_landing_url="https://audios.org/audio02", - audio_url="https://audios.org/audio02.jpg", + url="https://audios.org/audio02.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="03", foreign_landing_url="https://audios.org/audio03", - audio_url="https://audios.org/audio03.jpg", + url="https://audios.org/audio03.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="04", foreign_landing_url="https://audios.org/audio04", - audio_url="https://audios.org/audio04.jpg", + url="https://audios.org/audio04.jpg", license_info=PD_LICENSE_INFO, ) assert len(audio_store._media_buffer) == 4 @@ -104,25 +104,25 @@ def test_AudioStore_add_item_flushes_buffer(tmpdir): audio_store.add_item( foreign_identifier="01", foreign_landing_url="https://audios.org/audio01", - audio_url="https://audios.org/audio01.jpg", + url="https://audios.org/audio01.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="02", foreign_landing_url="https://audios.org/audio02", - audio_url="https://audios.org/audio02.jpg", + url="https://audios.org/audio02.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="03", foreign_landing_url="https://audios.org/audio03", - audio_url="https://audios.org/audio03.jpg", + url="https://audios.org/audio03.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="04", foreign_landing_url="https://audios.org/audio04", - audio_url="https://audios.org/audio04.jpg", + url="https://audios.org/audio04.jpg", license_info=PD_LICENSE_INFO, ) assert len(audio_store._media_buffer) == 1 @@ -141,19 +141,19 @@ def test_AudioStore_produces_correct_total_audios(): audio_store.add_item( foreign_identifier="01", foreign_landing_url="https://audios.org/audio01", - audio_url="https://audios.org/audio01.jpg", + url="https://audios.org/audio01.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="02", foreign_landing_url="https://audios.org/audio02", - audio_url="https://audios.org/audio02.jpg", + url="https://audios.org/audio02.jpg", license_info=PD_LICENSE_INFO, ) audio_store.add_item( foreign_identifier="03", foreign_landing_url="https://audios.org/audio03", - audio_url="https://audios.org/audio03.jpg", + url="https://audios.org/audio03.jpg", license_info=PD_LICENSE_INFO, ) assert audio_store.total_items == 3 diff --git a/catalog/tests/dags/common/storage/test_image.py b/catalog/tests/dags/common/storage/test_image.py index ecf0ad15902..0dd2de7f5b3 100644 --- a/catalog/tests/dags/common/storage/test_image.py +++ b/catalog/tests/dags/common/storage/test_image.py @@ -28,7 +28,7 @@ def test_ImageStore_add_item_adds_realistic_image_to_buffer(setup_env): image_store.add_item( foreign_identifier="01", foreign_landing_url="https://images.org/image01", - image_url="https://images.org/image01.jpg", + url="https://images.org/image01.jpg", license_info=PD_LICENSE_INFO, ) assert len(image_store._media_buffer) == 1 @@ -41,25 +41,25 @@ def test_ImageStore_add_item_adds_multiple_images_to_buffer( image_store.add_item( foreign_identifier="01", foreign_landing_url="https://images.org/image01", - image_url="https://images.org/image01.jpg", + url="https://images.org/image01.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="02", foreign_landing_url="https://images.org/image02", - image_url="https://images.org/image02.jpg", + url="https://images.org/image02.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="03", foreign_landing_url="https://images.org/image03", - image_url="https://images.org/image03.jpg", + url="https://images.org/image03.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="04", foreign_landing_url="https://images.org/image04", - image_url="https://images.org/image04.jpg", + url="https://images.org/image04.jpg", license_info=PD_LICENSE_INFO, ) assert len(image_store._media_buffer) == 4 @@ -71,7 +71,7 @@ def test_ImageStore_get_image_places_given_args( image_store = image.ImageStore(provider="testing_provider") args_dict = { "foreign_landing_url": "https://landing_page.com", - "image_url": "https://imageurl.com", + "url": "https://imageurl.com", "license_info": BY_LICENSE_INFO, "foreign_identifier": "foreign_id", "thumbnail_url": "https://thumbnail.com", @@ -106,7 +106,6 @@ def mock_enrich_tags(tags): args_dict["filesize"] = 1000 args_dict["license_"] = args_dict.get("license_info").license args_dict["license_version"] = args_dict.pop("license_info").version - args_dict["url"] = args_dict.pop("image_url") assert actual_image == image.Image(**args_dict) diff --git a/catalog/tests/dags/common/storage/test_media.py b/catalog/tests/dags/common/storage/test_media.py index 2f0419dd1f4..e79946cf88c 100644 --- a/catalog/tests/dags/common/storage/test_media.py +++ b/catalog/tests/dags/common/storage/test_media.py @@ -35,7 +35,7 @@ TEST_IMAGE_DICT = { "foreign_landing_url": None, - "image_url": None, + "url": None, "thumbnail_url": None, "filetype": None, "filesize": None, @@ -66,7 +66,7 @@ TEST_REQUIRED_FIELDS = { "foreign_landing_url": TEST_FOREIGN_LANDING_URL, "foreign_identifier": "02", - "image_url": TEST_IMAGE_URL, + "url": TEST_IMAGE_URL, "license_info": BY_LICENSE_INFO, } @@ -120,25 +120,25 @@ def test_MediaStore_add_item_flushes_buffer(tmpdir): image_store.add_item( foreign_identifier="01", foreign_landing_url="https://images.org/image01", - image_url="https://images.org/image01.jpg", + url="https://images.org/image01.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="02", foreign_landing_url="https://images.org/image02", - image_url="https://images.org/image02.jpg", + url="https://images.org/image02.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="03", foreign_landing_url="https://images.org/image03", - image_url="https://images.org/image03.jpg", + url="https://images.org/image03.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="04", foreign_landing_url="https://images.org/image04", - image_url="https://images.org/image04.jpg", + url="https://images.org/image04.jpg", license_info=PD_LICENSE_INFO, ) assert len(image_store._media_buffer) == 1 @@ -157,19 +157,19 @@ def test_MediaStore_produces_correct_total_images(): image_store.add_item( foreign_identifier="01", foreign_landing_url="https://images.org/image01", - image_url="https://images.org/image01.jpg", + url="https://images.org/image01.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="02", foreign_landing_url="https://images.org/image02", - image_url="https://images.org/image02.jpg", + url="https://images.org/image02.jpg", license_info=PD_LICENSE_INFO, ) image_store.add_item( foreign_identifier="03", foreign_landing_url="https://images.org/image03", - image_url="https://images.org/image03.jpg", + url="https://images.org/image03.jpg", license_info=PD_LICENSE_INFO, ) assert image_store.total_items == 3 @@ -187,7 +187,7 @@ def test_MediaStore_clean_media_metadata_does_not_change_required_media_argument } cleaned_data = image_store.clean_media_metadata(**image_data) - assert cleaned_data["image_url"] == TEST_IMAGE_URL + assert cleaned_data["url"] == TEST_IMAGE_URL assert cleaned_data["foreign_landing_url"] == TEST_FOREIGN_LANDING_URL @@ -262,9 +262,7 @@ def test_MediaStore_clean_media_metadata_does_not_replace_category_with_default( assert cleaned_data["category"] == prov.ImageCategory.PHOTOGRAPH -@pytest.mark.parametrize( - "field", ("foreign_identifier", "foreign_landing_url", "image_url") -) +@pytest.mark.parametrize("field", ("foreign_identifier", "foreign_landing_url", "url")) def test_MediaStore_clean_media_metadata_raises_when_missing_required_field( field, ): @@ -315,7 +313,7 @@ def test_MediaStore_clean_media_strips_url_trailing_slashes( image_store = image.ImageStore(strip_url_trailing_slashes=remove_slashes) test_data = { "foreign_landing_url": input_url, - "image_url": input_url, + "url": input_url, "thumbnail_url": input_url, "creator_url": input_url, } @@ -332,7 +330,7 @@ def test_MediaStore_get_image_gets_source(monkeypatch): actual_image = image_store._get_image( license_info=BY_LICENSE_INFO, foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=None, @@ -358,7 +356,7 @@ def test_MediaStore_sets_source_to_provider_if_source_is_none(monkeypatch): actual_image = image_store._get_image( license_info=BY_LICENSE_INFO, foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=1000, @@ -388,7 +386,7 @@ def item_saver(arg): image_store.add_item( license_info=BY_LICENSE_INFO, foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, foreign_identifier="02", width=None, @@ -424,7 +422,7 @@ def item_saver(arg): image_store.add_item( license_info=LicenseInfo("by", "4.0", valid_license_url, license_url), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, foreign_identifier="02", width=None, @@ -461,7 +459,7 @@ def item_saver(arg): image_store.add_item( license_info=LicenseInfo("by", "4.0", valid_license_url, license_url), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, foreign_identifier="02", width=None, height=None, @@ -496,7 +494,7 @@ def item_saver(arg): image_store.add_item( license_info=LicenseInfo("by-nc-sa", "2.0", updated_url, original_url), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, foreign_identifier="02", meta_data={}, ) @@ -518,7 +516,7 @@ def test_MediaStore_get_image_enriches_singleton_tags(): license_url="https://license/url", ), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=None, @@ -556,7 +554,7 @@ def test_MediaStore_get_image_tag_blacklist(): license_version="4.0", ), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, meta_data=None, raw_tags=raw_tags, category=None, @@ -585,7 +583,7 @@ def test_MediaStore_get_image_enriches_multiple_tags(): license_version="4.0", ), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=None, @@ -625,7 +623,7 @@ def test_MediaStore_get_image_leaves_preenriched_tags(setup_env): license_version="4.0", ), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=None, @@ -657,7 +655,7 @@ def test_MediaStore_get_image_nones_nonlist_tags(): license_version="4.0", ), foreign_landing_url=TEST_FOREIGN_LANDING_URL, - image_url=TEST_IMAGE_URL, + url=TEST_IMAGE_URL, thumbnail_url=None, filetype=None, filesize=None, @@ -684,7 +682,7 @@ def test_MediaStore_get_image_nones_nonlist_tags(): # Does not use extracted extension if it's not a valid image extension. # Uses the `filetype` even if the `url` extension is different. @pytest.mark.parametrize( - "filetype, image_url, expected_filetype", + "filetype, url, expected_filetype", [ (None, "https://example.com/image.jpg", "jpg"), (None, "https://example.com/image.jpeg", "jpg"), @@ -693,13 +691,13 @@ def test_MediaStore_get_image_nones_nonlist_tags(): ("jpeg", "https://example.com/image.gif", "jpg"), ], ) -def test_MediaStore_validates_filetype(filetype, image_url, expected_filetype): +def test_MediaStore_validates_filetype(filetype, url, expected_filetype): image_store = image.MockImageStore("test_provider") test_image_args = TEST_IMAGE_DICT | { "license_info": BY_LICENSE_INFO, "foreign_landing_url": "https://example.com/image.html", "foreign_identifier": "image1", - "image_url": image_url, + "url": url, "filetype": filetype, } test_image_args.pop("thumbnail_url") @@ -722,7 +720,7 @@ def test_MediaStore_validates_filesize(value, expected): "license_info": BY_LICENSE_INFO, "foreign_landing_url": "https://example.com/image.html", "foreign_identifier": "image1", - "image_url": TEST_IMAGE_URL, + "url": TEST_IMAGE_URL, "filesize": value, } test_image_args.pop("thumbnail_url") diff --git a/catalog/tests/dags/common/test_tsv_cleaner.py b/catalog/tests/dags/common/test_tsv_cleaner.py index c7c641eceb0..6a5ed9a64dc 100644 --- a/catalog/tests/dags/common/test_tsv_cleaner.py +++ b/catalog/tests/dags/common/test_tsv_cleaner.py @@ -29,7 +29,7 @@ def test_clean_tsv_cleans_tsv_rows(tmpdir): call(provider="test_provider"), call().add_item( foreign_landing_url="https://example.com/landing1", - image_url="https://example.com/image1", + url="https://example.com/image1", thumbnail_url="https://example.com/thumbnail1", license_info=by_license, foreign_identifier="one", @@ -54,7 +54,7 @@ def test_clean_tsv_cleans_tsv_rows(tmpdir): call(provider="next_provider"), call().add_item( foreign_landing_url="https://example.com/landing2", - image_url="https://example.com/image2", + url="https://example.com/image2", thumbnail_url="https://example.com/thumbnail2", license_info=by_nc_license, foreign_identifier="two", diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/full_db_response.txt b/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/full_db_response.txt index 66e613bd5cd..9d2b9040915 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/full_db_response.txt +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/full_db_response.txt @@ -1 +1 @@ -[([{'foreign_identifier': 10314159, 'width': 1530, 'height': 2048, 'title': 'Trifolium hybridum', 'raw_tags': ['Fabaceae', 'Fabales', 'Magnoliopsida', 'Angiospermae', 'Plantae', 'Trifolium', 'Tracheophyta', 'Faboideae', 'Trifolieae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/10314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/10314159/medium.jpg', 'creator': 'akjenny', 'creator_url': 'https://www.inaturalist.org/users/615549'}, {'foreign_identifier': 20314159, 'width': 2048, 'height': 1955, 'title': 'Lonicera sempervirens', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Dipsacales', 'Caprifoliaceae', 'Lonicera', 'Tracheophyta', 'Caprifolioideae', 'Caprifolium'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/20314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/20314159/medium.jpeg', 'creator': 'drshawntdash', 'creator_url': 'https://www.inaturalist.org/users/11861'}, {'foreign_identifier': 30314159, 'width': 2048, 'height': 1360, 'title': 'Ladona julia', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Odonata', 'Libellulidae', 'Anisoptera', 'Ladona', 'Pterygota', 'Hexapoda', 'Libelluloidea'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/30314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/30314159/medium.jpeg', 'creator': 'parzudak', 'creator_url': 'https://www.inaturalist.org/users/813200'}, {'foreign_identifier': 40314159, 'width': 1024, 'height': 683, 'title': 'Leucochrysum stipitatum', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Leucochrysum', 'Tracheophyta', 'Gnaphalieae', 'Asteroideae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc-sa/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/40314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/40314159/medium.jpeg', 'creator': 'kenw', 'creator_url': 'https://www.inaturalist.org/users/1623'}, {'foreign_identifier': 60314159, 'width': 1536, 'height': 2048, 'title': 'Arion ater', 'raw_tags': ['Animalia', 'Gastropoda', 'Mollusca', 'Stylommatophora', 'Arionidae', 'Arion', 'Arionoidea', 'Arion', 'Heterobranchia', 'Euthyneura', 'Tectipleura', 'Eupulmonata', 'Helicina', 'Arionoidei'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/60314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/60314159/medium.jpeg', 'creator': 'max_hof_mann', 'creator_url': 'https://www.inaturalist.org/users/775177'}, {'foreign_identifier': 80314159, 'width': 1536, 'height': 2048, 'title': 'Insecta', 'raw_tags': ['Animalia', 'Arthropoda', 'Hexapoda'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/80314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/80314159/medium.jpg', 'creator': 'sheilacro', 'creator_url': 'https://www.inaturalist.org/users/3225310'}, {'foreign_identifier': 90314159, 'width': 1350, 'height': 900, 'title': 'Herpetogramma', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Pyraloidea', 'Crambidae', 'Spilomelinae', 'Pterygota', 'Hexapoda', 'Herpetogrammatini'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc-nd/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/90314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/90314159/medium.jpeg', 'creator': 'natureguy', 'creator_url': 'https://www.inaturalist.org/users/134993'}, {'foreign_identifier': 110314159, 'width': 2048, 'height': 1365, 'title': 'Cladonia', 'raw_tags': ['Fungi', 'Ascomycota', 'Lecanorales', 'Cladoniaceae', 'Lecanoromycetes', 'Pezizomycotina', 'Lecanoromycetidae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/110314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/110314159/medium.jpeg', 'creator': 'sichonik', 'creator_url': 'https://www.inaturalist.org/users/3892316'}, {'foreign_identifier': 120314159, 'width': 792, 'height': 653, 'title': 'Sarcophagidae', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Diptera', 'Brachycera', 'Pterygota', 'Calyptratae', 'Oestroidea', 'Hexapoda', 'Cyclorrhapha'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/120314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/120314159/medium.jpg', 'creator': 'zealouswizard', 'creator_url': 'https://www.inaturalist.org/users/3234999'}, {'foreign_identifier': 130314159, 'width': 1024, 'height': 2048, 'title': 'Senecio vernalis', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Senecio', 'Tracheophyta', 'Senecioneae', 'Asteroideae', 'Senecioninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/130314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/130314159/medium.jpeg', 'creator': 'solokultas', 'creator_url': 'https://www.inaturalist.org/users/1198134'}, {'foreign_identifier': 150314159, 'width': 1536, 'height': 2048, 'title': 'Matricaria discoidea', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Matricaria', 'Tracheophyta', 'Anthemideae', 'Asteroideae', 'Matricariinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/150314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/150314159/medium.jpeg', 'creator': 'tularosatabulator', 'creator_url': 'https://www.inaturalist.org/users/3586703'}, {'foreign_identifier': 160314159, 'width': 1536, 'height': 2048, 'title': 'Sciurus granatensis', 'raw_tags': ['Animalia', 'Chordata', 'Mammalia', 'Rodentia', 'Sciuridae', 'Sciurus', 'Sciuromorpha', 'Sciurinae', 'Sciurini', 'Vertebrata', 'Theria', 'Placentalia', 'Euarchontoglires'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/160314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/160314159/medium.jpg', 'creator': 'mp_velez87', 'creator_url': 'https://www.inaturalist.org/users/5040484'}, {'foreign_identifier': 170314159, 'width': 1536, 'height': 2048, 'title': 'Bacopa monnieri', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Plantaginaceae', 'Bacopa', 'Tracheophyta', 'Gratioleae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/170314159', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/170314159/medium.jpg', 'creator': 'calebhelsel', 'creator_url': 'https://www.inaturalist.org/users/3314770'}, {'foreign_identifier': 190995604, 'width': 2048, 'height': 1536, 'title': 'Arenaria interpres', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Scolopacidae', 'Arenaria', 'Charadriiformes', 'Vertebrata'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995604', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995604/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995604, 'width': 2048, 'height': 1536, 'title': 'Dreissena polymorpha', 'raw_tags': ['Animalia', 'Mollusca', 'Bivalvia', 'Myida', 'Dreissenidae', 'Dreissena', 'Euheterodonta', 'Imparidentia', 'Dreissenoidea', 'Autobranchia', 'Heteroconchia', 'Dreisseninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995604', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995604/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995656, 'width': 2048, 'height': 1536, 'title': 'Dreissena polymorpha', 'raw_tags': ['Animalia', 'Mollusca', 'Bivalvia', 'Myida', 'Dreissenidae', 'Dreissena', 'Euheterodonta', 'Imparidentia', 'Dreissenoidea', 'Autobranchia', 'Heteroconchia', 'Dreisseninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995656', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995656/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995656, 'width': 2048, 'height': 1536, 'title': 'Arenaria interpres', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Scolopacidae', 'Arenaria', 'Charadriiformes', 'Vertebrata'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995656', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995656/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 191001500, 'width': 2048, 'height': 1536, 'title': 'Pachypsylla venusta', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Hemiptera', 'Psylloidea', 'Pterygota', 'Pachypsylla', 'Sternorrhyncha', 'Hexapoda', 'Aphalaridae', 'Pachypsyllinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191001500', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191001500/medium.jpeg', 'creator': 'cosmiccat', 'creator_url': 'https://www.inaturalist.org/users/34728'}, {'foreign_identifier': 191001500, 'width': 2048, 'height': 1536, 'title': 'Celtis laevigata', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Rosales', 'Cannabaceae', 'Celtis', 'Tracheophyta'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191001500', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191001500/medium.jpeg', 'creator': 'cosmiccat', 'creator_url': 'https://www.inaturalist.org/users/34728'}, {'foreign_identifier': 191012628, 'width': 1152, 'height': 2048, 'title': 'Pseudacris fouquettei', 'raw_tags': ['Animalia', 'Chordata', 'Amphibia', 'Anura', 'Hylidae', 'Pseudacris', 'Vertebrata', 'Acridinae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191012628', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191012628/medium.jpg', 'creator': 'kaptainkory', 'creator_url': 'https://www.inaturalist.org/users/22614'}, {'foreign_identifier': 191012628, 'width': 1152, 'height': 2048, 'title': 'Pseudacris crucifer', 'raw_tags': ['Animalia', 'Chordata', 'Amphibia', 'Anura', 'Hylidae', 'Pseudacris', 'Vertebrata', 'Acridinae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191012628', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191012628/medium.jpg', 'creator': 'kaptainkory', 'creator_url': 'https://www.inaturalist.org/users/22614'}, {'foreign_identifier': 191015484, 'width': 2048, 'height': 1684, 'title': 'Lantana camara', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Verbenaceae', 'Lantana', 'Tracheophyta', 'Lantaneae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015484', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015484/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015484, 'width': 2048, 'height': 1684, 'title': 'Aporiina', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Papilionoidea', 'Pieridae', 'Pterygota', 'Pierinae', 'Pierini', 'Hexapoda'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015484', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015484/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015547, 'width': 2048, 'height': 1536, 'title': 'Aporiina', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Papilionoidea', 'Pieridae', 'Pterygota', 'Pierinae', 'Pierini', 'Hexapoda'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015547', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015547/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015547, 'width': 2048, 'height': 1536, 'title': 'Lantana camara', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Verbenaceae', 'Lantana', 'Tracheophyta', 'Lantaneae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015547', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015547/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191016506, 'width': 2048, 'height': 1536, 'title': 'Gambusia holbrooki', 'raw_tags': ['Animalia', 'Chordata', 'Actinopterygii', 'Cyprinodontiformes', 'Poeciliidae', 'Gambusia', 'Vertebrata', 'Poeciliinae', 'Cyprinodontoidei'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191016506', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191016506/medium.jpeg', 'creator': 'kingmush', 'creator_url': 'https://www.inaturalist.org/users/3453019'}, {'foreign_identifier': 191016506, 'width': 2048, 'height': 1536, 'title': 'Panicum repens', 'raw_tags': ['Angiospermae', 'Plantae', 'Poales', 'Liliopsida', 'Poaceae', 'Panicum', 'Tracheophyta', 'Paniceae', 'Panicoideae', 'Panicinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191016506', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191016506/medium.jpeg', 'creator': 'kingmush', 'creator_url': 'https://www.inaturalist.org/users/3453019'}, {'foreign_identifier': 191018870, 'width': 1363, 'height': 2048, 'title': 'Toxicodendron diversilobum', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Sapindales', 'Anacardiaceae', 'Toxicodendron', 'Tracheophyta', 'Anacardioideae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018870', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018870/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018870, 'width': 1363, 'height': 2048, 'title': 'Pedicularis densiflora', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Orobanchaceae', 'Lamiales', 'Pedicularis', 'Tracheophyta', 'Pedicularideae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018870', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018870/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018903, 'width': 818, 'height': 741, 'title': 'Ranunculus occidentalis', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Ranunculus', 'Ranunculaceae', 'Ranunculales', 'Tracheophyta', 'Ranunculoideae', 'Ranunculeae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018903', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018903/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018903, 'width': 818, 'height': 741, 'title': 'Andrena', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Hymenoptera', 'Apoidea', 'Andrenidae', 'Apocrita', 'Pterygota', 'Aculeata', 'Hexapoda', 'Andreninae', 'Andrenini'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018903', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018903/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191019692, 'width': 1800, 'height': 1200, 'title': 'Mareca americana', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Anseriformes', 'Anatidae', 'Vertebrata', 'Mareca'], 'filetype': 'png', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191019692', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191019692/medium.png', 'creator': 'dabee', 'creator_url': 'https://www.inaturalist.org/users/4024764'}, {'foreign_identifier': 191019692, 'width': 1800, 'height': 1200, 'title': 'Anas platyrhynchos', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Anseriformes', 'Anatidae', 'Anas', 'Vertebrata'], 'filetype': 'png', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191019692', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191019692/medium.png', 'creator': 'dabee', 'creator_url': 'https://www.inaturalist.org/users/4024764'}, {'foreign_identifier': 200352737, 'width': 1536, 'height': 2048, 'title': 'Plantae', 'raw_tags': None, 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/200352737', 'image_url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/200352737/medium.jpg', 'creator': 'sikstro1', 'creator_url': 'https://www.inaturalist.org/users/2019940'}],)] +[([{'foreign_identifier': 10314159, 'width': 1530, 'height': 2048, 'title': 'Trifolium hybridum', 'raw_tags': ['Fabaceae', 'Fabales', 'Magnoliopsida', 'Angiospermae', 'Plantae', 'Trifolium', 'Tracheophyta', 'Faboideae', 'Trifolieae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/10314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/10314159/medium.jpg', 'creator': 'akjenny', 'creator_url': 'https://www.inaturalist.org/users/615549'}, {'foreign_identifier': 20314159, 'width': 2048, 'height': 1955, 'title': 'Lonicera sempervirens', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Dipsacales', 'Caprifoliaceae', 'Lonicera', 'Tracheophyta', 'Caprifolioideae', 'Caprifolium'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/20314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/20314159/medium.jpeg', 'creator': 'drshawntdash', 'creator_url': 'https://www.inaturalist.org/users/11861'}, {'foreign_identifier': 30314159, 'width': 2048, 'height': 1360, 'title': 'Ladona julia', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Odonata', 'Libellulidae', 'Anisoptera', 'Ladona', 'Pterygota', 'Hexapoda', 'Libelluloidea'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/30314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/30314159/medium.jpeg', 'creator': 'parzudak', 'creator_url': 'https://www.inaturalist.org/users/813200'}, {'foreign_identifier': 40314159, 'width': 1024, 'height': 683, 'title': 'Leucochrysum stipitatum', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Leucochrysum', 'Tracheophyta', 'Gnaphalieae', 'Asteroideae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc-sa/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/40314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/40314159/medium.jpeg', 'creator': 'kenw', 'creator_url': 'https://www.inaturalist.org/users/1623'}, {'foreign_identifier': 60314159, 'width': 1536, 'height': 2048, 'title': 'Arion ater', 'raw_tags': ['Animalia', 'Gastropoda', 'Mollusca', 'Stylommatophora', 'Arionidae', 'Arion', 'Arionoidea', 'Arion', 'Heterobranchia', 'Euthyneura', 'Tectipleura', 'Eupulmonata', 'Helicina', 'Arionoidei'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/60314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/60314159/medium.jpeg', 'creator': 'max_hof_mann', 'creator_url': 'https://www.inaturalist.org/users/775177'}, {'foreign_identifier': 80314159, 'width': 1536, 'height': 2048, 'title': 'Insecta', 'raw_tags': ['Animalia', 'Arthropoda', 'Hexapoda'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/80314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/80314159/medium.jpg', 'creator': 'sheilacro', 'creator_url': 'https://www.inaturalist.org/users/3225310'}, {'foreign_identifier': 90314159, 'width': 1350, 'height': 900, 'title': 'Herpetogramma', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Pyraloidea', 'Crambidae', 'Spilomelinae', 'Pterygota', 'Hexapoda', 'Herpetogrammatini'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc-nd/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/90314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/90314159/medium.jpeg', 'creator': 'natureguy', 'creator_url': 'https://www.inaturalist.org/users/134993'}, {'foreign_identifier': 110314159, 'width': 2048, 'height': 1365, 'title': 'Cladonia', 'raw_tags': ['Fungi', 'Ascomycota', 'Lecanorales', 'Cladoniaceae', 'Lecanoromycetes', 'Pezizomycotina', 'Lecanoromycetidae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/110314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/110314159/medium.jpeg', 'creator': 'sichonik', 'creator_url': 'https://www.inaturalist.org/users/3892316'}, {'foreign_identifier': 120314159, 'width': 792, 'height': 653, 'title': 'Sarcophagidae', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Diptera', 'Brachycera', 'Pterygota', 'Calyptratae', 'Oestroidea', 'Hexapoda', 'Cyclorrhapha'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/120314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/120314159/medium.jpg', 'creator': 'zealouswizard', 'creator_url': 'https://www.inaturalist.org/users/3234999'}, {'foreign_identifier': 130314159, 'width': 1024, 'height': 2048, 'title': 'Senecio vernalis', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Senecio', 'Tracheophyta', 'Senecioneae', 'Asteroideae', 'Senecioninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/130314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/130314159/medium.jpeg', 'creator': 'solokultas', 'creator_url': 'https://www.inaturalist.org/users/1198134'}, {'foreign_identifier': 150314159, 'width': 1536, 'height': 2048, 'title': 'Matricaria discoidea', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Asteraceae', 'Asterales', 'Matricaria', 'Tracheophyta', 'Anthemideae', 'Asteroideae', 'Matricariinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/150314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/150314159/medium.jpeg', 'creator': 'tularosatabulator', 'creator_url': 'https://www.inaturalist.org/users/3586703'}, {'foreign_identifier': 160314159, 'width': 1536, 'height': 2048, 'title': 'Sciurus granatensis', 'raw_tags': ['Animalia', 'Chordata', 'Mammalia', 'Rodentia', 'Sciuridae', 'Sciurus', 'Sciuromorpha', 'Sciurinae', 'Sciurini', 'Vertebrata', 'Theria', 'Placentalia', 'Euarchontoglires'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/160314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/160314159/medium.jpg', 'creator': 'mp_velez87', 'creator_url': 'https://www.inaturalist.org/users/5040484'}, {'foreign_identifier': 170314159, 'width': 1536, 'height': 2048, 'title': 'Bacopa monnieri', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Plantaginaceae', 'Bacopa', 'Tracheophyta', 'Gratioleae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/170314159', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/170314159/medium.jpg', 'creator': 'calebhelsel', 'creator_url': 'https://www.inaturalist.org/users/3314770'}, {'foreign_identifier': 190995604, 'width': 2048, 'height': 1536, 'title': 'Arenaria interpres', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Scolopacidae', 'Arenaria', 'Charadriiformes', 'Vertebrata'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995604', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995604/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995604, 'width': 2048, 'height': 1536, 'title': 'Dreissena polymorpha', 'raw_tags': ['Animalia', 'Mollusca', 'Bivalvia', 'Myida', 'Dreissenidae', 'Dreissena', 'Euheterodonta', 'Imparidentia', 'Dreissenoidea', 'Autobranchia', 'Heteroconchia', 'Dreisseninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995604', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995604/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995656, 'width': 2048, 'height': 1536, 'title': 'Dreissena polymorpha', 'raw_tags': ['Animalia', 'Mollusca', 'Bivalvia', 'Myida', 'Dreissenidae', 'Dreissena', 'Euheterodonta', 'Imparidentia', 'Dreissenoidea', 'Autobranchia', 'Heteroconchia', 'Dreisseninae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995656', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995656/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 190995656, 'width': 2048, 'height': 1536, 'title': 'Arenaria interpres', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Scolopacidae', 'Arenaria', 'Charadriiformes', 'Vertebrata'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/190995656', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/190995656/medium.jpeg', 'creator': 'raymie', 'creator_url': 'https://www.inaturalist.org/users/765334'}, {'foreign_identifier': 191001500, 'width': 2048, 'height': 1536, 'title': 'Pachypsylla venusta', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Hemiptera', 'Psylloidea', 'Pterygota', 'Pachypsylla', 'Sternorrhyncha', 'Hexapoda', 'Aphalaridae', 'Pachypsyllinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191001500', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191001500/medium.jpeg', 'creator': 'cosmiccat', 'creator_url': 'https://www.inaturalist.org/users/34728'}, {'foreign_identifier': 191001500, 'width': 2048, 'height': 1536, 'title': 'Celtis laevigata', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Rosales', 'Cannabaceae', 'Celtis', 'Tracheophyta'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191001500', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191001500/medium.jpeg', 'creator': 'cosmiccat', 'creator_url': 'https://www.inaturalist.org/users/34728'}, {'foreign_identifier': 191012628, 'width': 1152, 'height': 2048, 'title': 'Pseudacris fouquettei', 'raw_tags': ['Animalia', 'Chordata', 'Amphibia', 'Anura', 'Hylidae', 'Pseudacris', 'Vertebrata', 'Acridinae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191012628', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191012628/medium.jpg', 'creator': 'kaptainkory', 'creator_url': 'https://www.inaturalist.org/users/22614'}, {'foreign_identifier': 191012628, 'width': 1152, 'height': 2048, 'title': 'Pseudacris crucifer', 'raw_tags': ['Animalia', 'Chordata', 'Amphibia', 'Anura', 'Hylidae', 'Pseudacris', 'Vertebrata', 'Acridinae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191012628', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191012628/medium.jpg', 'creator': 'kaptainkory', 'creator_url': 'https://www.inaturalist.org/users/22614'}, {'foreign_identifier': 191015484, 'width': 2048, 'height': 1684, 'title': 'Lantana camara', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Verbenaceae', 'Lantana', 'Tracheophyta', 'Lantaneae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015484', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015484/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015484, 'width': 2048, 'height': 1684, 'title': 'Aporiina', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Papilionoidea', 'Pieridae', 'Pterygota', 'Pierinae', 'Pierini', 'Hexapoda'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015484', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015484/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015547, 'width': 2048, 'height': 1536, 'title': 'Aporiina', 'raw_tags': ['Animalia', 'Arthropoda', 'Lepidoptera', 'Insecta', 'Papilionoidea', 'Pieridae', 'Pterygota', 'Pierinae', 'Pierini', 'Hexapoda'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015547', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015547/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191015547, 'width': 2048, 'height': 1536, 'title': 'Lantana camara', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Lamiales', 'Verbenaceae', 'Lantana', 'Tracheophyta', 'Lantaneae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191015547', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191015547/medium.jpeg', 'creator': 'bushboy', 'creator_url': 'https://www.inaturalist.org/users/662724'}, {'foreign_identifier': 191016506, 'width': 2048, 'height': 1536, 'title': 'Gambusia holbrooki', 'raw_tags': ['Animalia', 'Chordata', 'Actinopterygii', 'Cyprinodontiformes', 'Poeciliidae', 'Gambusia', 'Vertebrata', 'Poeciliinae', 'Cyprinodontoidei'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191016506', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191016506/medium.jpeg', 'creator': 'kingmush', 'creator_url': 'https://www.inaturalist.org/users/3453019'}, {'foreign_identifier': 191016506, 'width': 2048, 'height': 1536, 'title': 'Panicum repens', 'raw_tags': ['Angiospermae', 'Plantae', 'Poales', 'Liliopsida', 'Poaceae', 'Panicum', 'Tracheophyta', 'Paniceae', 'Panicoideae', 'Panicinae'], 'filetype': 'jpeg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191016506', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191016506/medium.jpeg', 'creator': 'kingmush', 'creator_url': 'https://www.inaturalist.org/users/3453019'}, {'foreign_identifier': 191018870, 'width': 1363, 'height': 2048, 'title': 'Toxicodendron diversilobum', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Sapindales', 'Anacardiaceae', 'Toxicodendron', 'Tracheophyta', 'Anacardioideae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018870', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018870/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018870, 'width': 1363, 'height': 2048, 'title': 'Pedicularis densiflora', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Orobanchaceae', 'Lamiales', 'Pedicularis', 'Tracheophyta', 'Pedicularideae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018870', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018870/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018903, 'width': 818, 'height': 741, 'title': 'Ranunculus occidentalis', 'raw_tags': ['Magnoliopsida', 'Angiospermae', 'Plantae', 'Ranunculus', 'Ranunculaceae', 'Ranunculales', 'Tracheophyta', 'Ranunculoideae', 'Ranunculeae'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018903', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018903/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191018903, 'width': 818, 'height': 741, 'title': 'Andrena', 'raw_tags': ['Animalia', 'Arthropoda', 'Insecta', 'Hymenoptera', 'Apoidea', 'Andrenidae', 'Apocrita', 'Pterygota', 'Aculeata', 'Hexapoda', 'Andreninae', 'Andrenini'], 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191018903', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191018903/medium.jpg', 'creator': 'tiwane', 'creator_url': 'https://www.inaturalist.org/users/28'}, {'foreign_identifier': 191019692, 'width': 1800, 'height': 1200, 'title': 'Mareca americana', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Anseriformes', 'Anatidae', 'Vertebrata', 'Mareca'], 'filetype': 'png', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191019692', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191019692/medium.png', 'creator': 'dabee', 'creator_url': 'https://www.inaturalist.org/users/4024764'}, {'foreign_identifier': 191019692, 'width': 1800, 'height': 1200, 'title': 'Anas platyrhynchos', 'raw_tags': ['Animalia', 'Chordata', 'Aves', 'Anseriformes', 'Anatidae', 'Anas', 'Vertebrata'], 'filetype': 'png', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/191019692', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/191019692/medium.png', 'creator': 'dabee', 'creator_url': 'https://www.inaturalist.org/users/4024764'}, {'foreign_identifier': 200352737, 'width': 1536, 'height': 2048, 'title': 'Plantae', 'raw_tags': None, 'filetype': 'jpg', 'license_url': 'http://creativecommons.org/licenses/by-nc/4.0/', 'foreign_landing_url': 'https://www.inaturalist.org/photos/200352737', 'url': 'https://inaturalist-open-data.s3.amazonaws.com/photos/200352737/medium.jpg', 'creator': 'sikstro1', 'creator_url': 'https://www.inaturalist.org/users/2019940'}],)] diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_additional_image_data.json b/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_additional_image_data.json index 7c6ea738672..e6697550b17 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_additional_image_data.json +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_additional_image_data.json @@ -3,7 +3,7 @@ "creator": "Kiyohara Yukinobu", "foreign_identifier": "45734-DP251139", "foreign_landing_url": "https://wwwstg.metmuseum.org/art/collection/search/45734", - "image_url": "https://images.metmuseum.org/CRDImages/as/original/DP251139.jpg", + "url": "https://images.metmuseum.org/CRDImages/as/original/DP251139.jpg", "license_info": { "license": "cc0", "url": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -31,7 +31,7 @@ "creator": "Kiyohara Yukinobu", "foreign_identifier": "45734-DP251138", "foreign_landing_url": "https://wwwstg.metmuseum.org/art/collection/search/45734", - "image_url": "https://images.metmuseum.org/CRDImages/as/original/DP251138.jpg", + "url": "https://images.metmuseum.org/CRDImages/as/original/DP251138.jpg", "license_info": { "license": "cc0", "url": "https://creativecommons.org/publicdomain/zero/1.0/", @@ -59,7 +59,7 @@ "creator": "Kiyohara Yukinobu", "foreign_identifier": "45734-DP251120", "foreign_landing_url": "https://wwwstg.metmuseum.org/art/collection/search/45734", - "image_url": "https://images.metmuseum.org/CRDImages/as/original/DP251120.jpg", + "url": "https://images.metmuseum.org/CRDImages/as/original/DP251120.jpg", "license_info": { "license": "cc0", "url": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_image_data.json b/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_image_data.json index 403b2680a6a..3c93e0fefc2 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_image_data.json +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/metropolitan_museum_of_art/sample_image_data.json @@ -3,7 +3,7 @@ "creator": "", "foreign_identifier": "47533-79_2_414b_S1_sf", "foreign_landing_url": "https://www.metmuseum.org/art/collection/search/47533", - "image_url": "https://images.metmuseum.org/CRDImages/as/original/79_2_414b_S1_sf.jpg", + "url": "https://images.metmuseum.org/CRDImages/as/original/79_2_414b_S1_sf.jpg", "license_info": { "license": "cc0", "url": "https://creativecommons.org/publicdomain/zero/1.0/", diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/provider_data_ingester/mock_provider_data_ingester.py b/catalog/tests/dags/providers/provider_api_scripts/resources/provider_data_ingester/mock_provider_data_ingester.py index df9480a6075..40a5a993690 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/provider_data_ingester/mock_provider_data_ingester.py +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/provider_data_ingester/mock_provider_data_ingester.py @@ -50,11 +50,8 @@ def get_record_data(self, record): "foreign_landing_url": record["url"], "media_type": record["media_type"], "license_info": LICENSE_INFO, + "url": record["url"], } - if record["media_type"] == "audio": - data["audio_url"] = record["audio_url"] - elif record["media_type"] == "image": - data["image_url"] = record["image_url"] return data @@ -112,22 +109,19 @@ def get_record_count_from_response(self, response_json): "id": 100, "media_type": "image", "title": "Title 100", - "image_url": "https://openaccess-cdn.clevelandart.org/1916.586.a/1916.586.a_web.jpg", # noqa: E501 - "url": "https://clevelandart.org/art/1916.586.a", + "url": "https://openaccess-cdn.clevelandart.org/1916.586.a/1916.586.a_web.jpg", # noqa: E501 }, { "id": 101, "media_type": "audio", "title": "Title 101", - "audio_url": "https://openaccess-cdn.clevelandart.org/1335.1917/1335.1917_web.jpg", # noqa: E501 - "url": "https://clevelandart.org/art/1335.1917", + "url": "https://openaccess-cdn.clevelandart.org/1335.1917/1335.1917_web.jpg", # noqa: E501 }, { "id": 102, "media_type": "image", "title": "Title 102", - "image_url": "https://openaccess-cdn.clevelandart.org/1915.534/1915.534_web.jpg", # noqa: E501 - "url": "https://clevelandart.org/art/1915.534", + "url": "https://openaccess-cdn.clevelandart.org/1915.534/1915.534_web.jpg", # noqa: E501 }, ] @@ -138,13 +132,13 @@ def get_record_count_from_response(self, response_json): "foreign_landing_url": "https://clevelandart.org/art/1335.1917", "media_type": "audio", "license_info": LICENSE_INFO, - "audio_url": "https://openaccess-cdn.clevelandart.org/1335.1917/1335.1917_web.jpg", # noqa: E501 + "url": "https://openaccess-cdn.clevelandart.org/1335.1917/1335.1917_web.jpg", # noqa: E501 }, { "foreign_identifier": 100, "foreign_landing_url": "https://clevelandart.org/art/1916.586.a", "media_type": "image", "license_info": LICENSE_INFO, - "image_url": "https://openaccess-cdn.clevelandart.org/1916.586.a/1916.586.a_web.jpg", # noqa: E501 + "url": "https://openaccess-cdn.clevelandart.org/1916.586.a/1916.586.a_web.jpg", # noqa: E501 }, ] diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_hq.json b/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_hq.json index 66f501bb55c..86d2f8ee863 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_hq.json +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_hq.json @@ -3,7 +3,7 @@ "filesize": 11784886, "height": 1059, "id": "https://iip.smk.dk/iiif/jp2/KKSgb6458.tif.reconstructed.tif.jp2", - "image_url": "https://iip.smk.dk/iiif/jp2/KKSgb6458.tif.reconstructed.tif.jp2/full/!2048,/0/default.jpg", + "url": "https://iip.smk.dk/iiif/jp2/KKSgb6458.tif.reconstructed.tif.jp2/full/!2048,/0/default.jpg", "thumbnail_url": "https://iip-thumb.smk.dk/iiif/jp2/2227ms627_KKSgb6458.tif.reconstructed.tif.jp2/full/!1024,/0/default.jpg", "width": 3887 } diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_legacy.json b/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_legacy.json index 7fad0db3f37..92823b0b252 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_legacy.json +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/smk/expected_image_data_legacy.json @@ -3,7 +3,7 @@ "filesize": 11784886, "height": 1059, "id": "1170012466_object", - "image_url": "https://api.smk.dk/api/v1/thumbnail/52f00edc-936e-42a7-950b-d0cd0df3864b.jpg", + "url": "https://api.smk.dk/api/v1/thumbnail/52f00edc-936e-42a7-950b-d0cd0df3864b.jpg", "thumbnail_url": "https://api.smk.dk/api/v1/thumbnail/52f00edc-936e-42a7-950b-d0cd0df3864b.jpg", "width": 3887 } diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_brooklyn_museum.py b/catalog/tests/dags/providers/provider_api_scripts/test_brooklyn_museum.py index efc523d6afc..49110fa47d4 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_brooklyn_museum.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_brooklyn_museum.py @@ -90,7 +90,7 @@ def test_process_batch(batch_objects_name, object_data_name, expected_count): "foreign_identifier": 170425, "foreign_landing_url": "https://www.brooklynmuseum.org/opencollection/objects/90636", "height": 1152, - "image_url": "d1lfxha3ugu3d4.cloudfront.net/images/opencollection/objects/size4/CUR.66.242.29.jpg", + "url": "d1lfxha3ugu3d4.cloudfront.net/images/opencollection/objects/size4/CUR.66.242.29.jpg", "license_info": LicenseInfo( license="by", version="3.0", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_europeana.py b/catalog/tests/dags/providers/provider_api_scripts/test_europeana.py index ac750894d49..88b1a8a033c 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_europeana.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_europeana.py @@ -117,7 +117,7 @@ def test_record_builder_get_record_data(ingester, record_builder): "foreign_landing_url": ( "http://bibliotecadigital.jcyl.es/i18n/consulta/registro.cmd?" "id=26229" ), - "image_url": ( + "url": ( "http://bibliotecadigital.jcyl.es/i18n/catalogo_imagenes" "/imagen_id.cmd?idImagen=102620362" ), @@ -280,7 +280,7 @@ def test_process_image_data_with_sub_provider(record_builder): assert record_data == { "foreign_landing_url": "https://wellcomecollection.org/works/zzwnbyhb", - "image_url": ( + "url": ( "https://iiif.wellcomecollection.org/image/V0013398.jpg/full/512," "/0/default.jpg" ), diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_finnish_museums.py b/catalog/tests/dags/providers/provider_api_scripts/test_finnish_museums.py index b69f8f823f7..9a29e201f9c 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_finnish_museums.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_finnish_museums.py @@ -122,7 +122,7 @@ def test_process_object_with_real_example(): ), "foreign_identifier": "sa-kuva.sa-kuva-1835", "foreign_landing_url": "https://www.finna.fi/Record/sa-kuva.sa-kuva-1835", - "image_url": "https://api.finna.fi/Cover/Show?source=Solr&id=sa-kuva.sa-kuva-1835&index=0&size=large", + "url": "https://api.finna.fi/Cover/Show?source=Solr&id=sa-kuva.sa-kuva-1835&index=0&size=large", "title": "Vuokkiniemen koulu", "source": "finnish_military_museum", "creator": "Uomala, valokuvaaja", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_flickr.py b/catalog/tests/dags/providers/provider_api_scripts/test_flickr.py index 28a2afc8934..273e9722828 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_flickr.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_flickr.py @@ -151,9 +151,7 @@ def test_get_record_data(): expected_data = { "foreign_landing_url": "https://www.flickr.com/photos/71925535@N03/49514824541", - "image_url": ( - "https://live.staticflickr.com/65535/49514824541_35d1b4f8db" "_b.jpg" - ), + "url": ("https://live.staticflickr.com/65535/49514824541_35d1b4f8db" "_b.jpg"), "license_info": test_license_info, "foreign_identifier": "49514824541", "width": 1024, @@ -323,9 +321,7 @@ def test_get_record_data_with_sub_provider(): "foreign_landing_url": ( "https://www.flickr.com/photos/35067687@N04/49950595947" ), - "image_url": ( - "https://live.staticflickr.com/65535/49950595947_65a3560ddc" "_b.jpg" - ), + "url": ("https://live.staticflickr.com/65535/49950595947_65a3560ddc" "_b.jpg"), "license_info": test_license_info, "foreign_identifier": "49950595947", "width": 1024, diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_freesound.py b/catalog/tests/dags/providers/provider_api_scripts/test_freesound.py index 51476e34171..540c19b421a 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_freesound.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_freesound.py @@ -116,7 +116,7 @@ def test_get_audio_files_handles_example_audio_data(audio_data, file_size_patch) actual = fsd._get_audio_files(audio_data) expected = ( { - "audio_url": "https://freesound.org/data/previews/415/415362_6044691-hq.mp3", + "url": "https://freesound.org/data/previews/415/415362_6044691-hq.mp3", "bit_rate": 128000, "filesize": AUDIO_FILE_SIZE, "filetype": "mp3", @@ -210,7 +210,7 @@ def test_extract_audio_data_handles_example_dict(audio_data, file_size_patch): }, ], "audio_set": "https://freesound.org/apiv2/packs/23434/", - "audio_url": "https://freesound.org/data/previews/415/415362_6044691-hq.mp3", + "url": "https://freesound.org/data/previews/415/415362_6044691-hq.mp3", "bit_rate": 128000, "creator": "owly-bee", "creator_url": "https://freesound.org/people/owly-bee/", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_jamendo.py b/catalog/tests/dags/providers/provider_api_scripts/test_jamendo.py index 6be39240797..29d4afd51e5 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_jamendo.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_jamendo.py @@ -69,7 +69,7 @@ def test_get_record_data(): actual = jamendo.get_record_data(item_data) expected = { "audio_set": "Opera I", - "audio_url": "https://mp3d.jamendo.com/?trackid=732&format=mp32", + "url": "https://mp3d.jamendo.com/?trackid=732&format=mp32", "category": "music", "creator": "Haeresis", "creator_url": "https://www.jamendo.com/artist/92/haeresis", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_metropolitan_museum.py b/catalog/tests/dags/providers/provider_api_scripts/test_metropolitan_museum.py index 434db312d87..1bdf5f41d85 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_metropolitan_museum.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_metropolitan_museum.py @@ -209,7 +209,7 @@ def test_get_record_data_with_non_ok(): '{"isPublicDomain": true, "objectURL": "test.com", "primaryImage": ""}' ), None, - id="missing_image_url", + id="missing_url", ), ], ) diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_museum_victoria.py b/catalog/tests/dags/providers/provider_api_scripts/test_museum_victoria.py index af0bbb92b6e..8d6fc74fdc5 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_museum_victoria.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_museum_victoria.py @@ -68,7 +68,7 @@ def test_get_record_data(): expected_image_data = { "foreign_identifier": "media/488013", - "image_url": "https://collections.museumsvictoria.com.au/content/media/13/488013-large.jpg", + "url": "https://collections.museumsvictoria.com.au/content/media/13/488013-large.jpg", "height": 1753, "width": 3000, "creator": "", @@ -117,7 +117,7 @@ def test_get_images_success(): expected_image_data = { "creator": "Photographer: Deb Tout-Smith", "foreign_identifier": "media/329745", - "image_url": "https://collections.museumsvictoria.com.au/content/media/45/329745-large.jpg", + "url": "https://collections.museumsvictoria.com.au/content/media/45/329745-large.jpg", "license_info": get_license_info( license_url="https://creativecommons.org/licenses/by/4.0" ), diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py b/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py index 74a531f3e3c..d39023c049b 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_nappy.py @@ -93,7 +93,7 @@ def test_get_should_continue(response_json, expected_result): SINGLE_ITEM, { "foreign_landing_url": "https://nappy.co/photo/9/woman-with-tattoos", - "image_url": "https://images.nappy.co/uploads/large/101591721349meykm7s6hvaswwvslpjrwibeyzru1fcxtxh0hf09cs7kdhmtptef4y3k4ua5z1bkyrbxov8tmagnafm8upwa3hxaxururtx7azaf.jpg", + "url": "https://images.nappy.co/uploads/large/101591721349meykm7s6hvaswwvslpjrwibeyzru1fcxtxh0hf09cs7kdhmtptef4y3k4ua5z1bkyrbxov8tmagnafm8upwa3hxaxururtx7azaf.jpg", "license_info": get_license_info( "https://creativecommons.org/publicdomain/zero/1.0/" ), diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_nypl.py b/catalog/tests/dags/providers/provider_api_scripts/test_nypl.py index 13adfd24fae..daf68c8a883 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_nypl.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_nypl.py @@ -120,7 +120,7 @@ def test_get_record_data_success(): "filetype": "jpeg", "foreign_identifier": "56738462", "foreign_landing_url": "http://digitalcollections.nypl.org/items/0cabe3d0-3d50-0134-a8e0-00505686a51c", - "image_url": "http://images.nypl.org/index.php?id=56738462&t=g&suffix=0cabe3d0-3d50-0134-a8e0-00505686a51c.001", + "url": "http://images.nypl.org/index.php?id=56738462&t=g&suffix=0cabe3d0-3d50-0134-a8e0-00505686a51c.001", "meta_data": { "date_issued": "1981", "genre": "Maps", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_phylopic.py b/catalog/tests/dags/providers/provider_api_scripts/test_phylopic.py index db4513f2ed4..fb9ea81b1e1 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_phylopic.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_phylopic.py @@ -104,7 +104,7 @@ def test_get_record_data(): "license_info": license_info, "foreign_identifier": "5b1e88b5-159d-495d-b8cb-04f9e28d2f02", "foreign_landing_url": "https://www.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02?build=194", - "image_url": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/source.svg", + "url": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/source.svg", "title": "Hemaris tityus", "creator": "Andy Wilson", "creator_url": "https://www.phylopic.org/contributors/c3ac6939-e85a-4a10-99d1-4079537f34de?build=194", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_rawpixel.py b/catalog/tests/dags/providers/provider_api_scripts/test_rawpixel.py index 8e78225ed90..1019dd81468 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_rawpixel.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_rawpixel.py @@ -291,7 +291,7 @@ def test_get_record_data(): "foreign_identifier": 4032668, "foreign_landing_url": "https://www.rawpixel.com/image/4032668/photo-image-background-nature-mountain", # noqa "height": 5515, - "image_url": "https://images.rawpixel.com/image_1300/cHJpdmF0ZS9sci9pbWFnZXMvd2Vic2l0ZS8yMDIyLTA1L2ZsNDY0NDU5OTQ2MjQtaW1hZ2Uta3UyY21zcjUuanBn.jpg", # noqa + "url": "https://images.rawpixel.com/image_1300/cHJpdmF0ZS9sci9pbWFnZXMvd2Vic2l0ZS8yMDIyLTA1L2ZsNDY0NDU5OTQ2MjQtaW1hZ2Uta3UyY21zcjUuanBn.jpg", # noqa "license_info": LicenseInfo( license="cc0", version="1.0", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_science_museum.py b/catalog/tests/dags/providers/provider_api_scripts/test_science_museum.py index 8c3cfcde313..faefcf67f13 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_science_museum.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_science_museum.py @@ -123,7 +123,7 @@ def test_get_record_data_success(object_data): expected_image_data = { "foreign_identifier": "i4453", "foreign_landing_url": "https://collection.sciencemuseumgroup.org.uk/objects/co56202/telescope-by-galileo-replica-telescope-galilean-telescope-refracting-replica", - "image_url": "https://coimages.sciencemuseumgroup.org.uk/images/4/453/large_1923_0668__0002_.jpg", + "url": "https://coimages.sciencemuseumgroup.org.uk/images/4/453/large_1923_0668__0002_.jpg", "height": 1151, "width": 1536, "filetype": "jpeg", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_smithsonian.py b/catalog/tests/dags/providers/provider_api_scripts/test_smithsonian.py index 990a343efed..4b6c6e8748f 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_smithsonian.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_smithsonian.py @@ -581,11 +581,11 @@ def test_extract_tags(input_is, expect_tags): [ { "foreign_identifier": "id_one", - "image_url": "https://image.url.one", + "url": "https://image.url.one", }, { "foreign_identifier": "id_two", - "image_url": "https://image.url.two", + "url": "https://image.url.two", }, ], ), @@ -611,7 +611,7 @@ def test_get_record_data(): actual_data = ingester.get_record_data(data) expected_data = [ { - "image_url": "https://collections.nmnh.si.edu/media/?irn=15814382", + "url": "https://collections.nmnh.si.edu/media/?irn=15814382", "foreign_identifier": "https://collections.nmnh.si.edu/media/?irn=15814382", "foreign_landing_url": "http://n2t.net/ark:/65665/34857ca78-9195-4156-849b-1ec47f7cd1ce", "title": "Passerculus sandwichensis nevadensis", diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_stocksnap.py b/catalog/tests/dags/providers/provider_api_scripts/test_stocksnap.py index 484fb491be1..1a326efee1b 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_stocksnap.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_stocksnap.py @@ -179,7 +179,7 @@ def test_get_record_data_handles_example_dict(filesize_mock): image_data = _get_resource_json("full_item.json") filesize_mock.return_value = 123456 actual_image_info = stocksnap.get_record_data(image_data) - image_url = "https://cdn.stocksnap.io/img-thumbs/960w/7VAQUG1X3B.jpg" + url = "https://cdn.stocksnap.io/img-thumbs/960w/7VAQUG1X3B.jpg" expected_image_info = { "title": "Female Fitness", "creator": "Matt Moloney", @@ -189,7 +189,7 @@ def test_get_record_data_handles_example_dict(filesize_mock): "license_info": get_license_info( license_url="https://creativecommons.org/publicdomain/zero/1.0/" ), - "image_url": image_url, + "url": url, "filesize": 123456, "filetype": "jpg", "height": 4000, diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py b/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py index 1a6cae1ab79..e8054fb22b2 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py @@ -266,7 +266,7 @@ def test_get_record_data_handles_example_dict(wmc): "https://commons.wikimedia.org/w/index.php?curid=81754323" ), "foreign_identifier": 81754323, - "image_url": ( + "url": ( "https://upload.wikimedia.org/wikipedia/commons/2/25/20120925_" "PlozevetBretagne_LoneTree_DSC07971_PtrQs.jpg" ), @@ -425,7 +425,7 @@ def test_get_audio_record_data_parses_ogg_streams(wmc): actual_parsed_data = wmc.get_audio_record_data(original_data, file_metadata) expected_parsed_data = { - "audio_url": "myurl.com", + "url": "myurl.com", "bit_rate": 112000, "sample_rate": 48000, "meta_data": {"channels": 2}, @@ -439,7 +439,7 @@ def test_get_audio_record_data_parses_wav_audio_data(wmc): actual_parsed_data = wmc.get_audio_record_data(original_data, file_metadata) expected_parsed_data = { - "audio_url": "myurl.com", + "url": "myurl.com", "bit_rate": 768000, "sample_rate": 48000, "meta_data": {"channels": 1}, @@ -456,7 +456,7 @@ def test_get_audio_record_data_parses_wav_audio_data_missing_streams(wmc): ) actual_parsed_data = wmc.get_audio_record_data(original_data, file_metadata) expected_parsed_data = { - "audio_url": "myurl.com", + "url": "myurl.com", "meta_data": {}, } # No data is available, so nothing should be added @@ -471,7 +471,7 @@ def test_get_audio_record_data_parses_wav_invalid_bit_rate(wmc): "value" ] = 4294967294 expected_parsed_data = { - "audio_url": "myurl.com", + "url": "myurl.com", "bit_rate": None, "sample_rate": 48000, "meta_data": {"channels": 1}, diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_wordpress.py b/catalog/tests/dags/providers/provider_api_scripts/test_wordpress.py index 1b9c44e4e74..e021aaee0df 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_wordpress.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_wordpress.py @@ -93,7 +93,7 @@ def test_get_file_info(ingester): ) actual_result = ingester._get_file_info(image_details) expected_result = ( - "https://pd.w.org/2022/05/203627f31f8770f03.61535278-2048x1366.jpg", # image_url + "https://pd.w.org/2022/05/203627f31f8770f03.61535278-2048x1366.jpg", # url 1366, # height 2048, # width 544284, # filesize