Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

Commit

Permalink
Make wikimedia script pass license_info, not license_url (#129)
Browse files Browse the repository at this point in the history
* Make wikimedia script pass license_info, not license_url

Signed-off-by: Olga Bulat <[email protected]>

* Fix import of get_license_info

Co-authored-by: Krystle Salazar <[email protected]>
  • Loading branch information
obulat and krysal authored Jul 16, 2021
1 parent bf5ebd2 commit 20772bb
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
import os
from unittest.mock import patch
from unittest.mock import patch, call

import wikimedia_commons as wmc
from common.licenses.licenses import get_license_info

RESOURCES = os.path.join(
os.path.abspath(os.path.dirname(__file__)), 'tests/resources/wikimedia'
Expand Down Expand Up @@ -252,6 +253,10 @@ def test_extract_title_gets_cleaned_title():


def test_process_image_data_handles_example_dict():
"""
Converts sample json data to correct image metadata,
and calls `add_item` once for a valid image.
"""
with open(os.path.join(RESOURCES, 'image_data_example.json')) as f:
image_data = json.load(f)

Expand All @@ -261,32 +266,46 @@ def test_process_image_data_handles_example_dict():
return_value=1
) as mock_add:
wmc._process_image_data(image_data)

mock_add.assert_called_once_with(
expected_license_info = get_license_info(
license_url='https://creativecommons.org/licenses/by-sa/4.0'
)
assert mock_add.call_count == 1
assert mock_add.call_args == call(
foreign_landing_url=(
'https://commons.wikimedia.org/w/index.php?curid=81754323'),
image_url=(
'https://upload.wikimedia.org/wikipedia/commons/2/25/20120925_'
'PlozevetBretagne_LoneTree_DSC07971_PtrQs.jpg'),
license_url='https://creativecommons.org/licenses/by-sa/4.0',
license_info=expected_license_info,
foreign_identifier=81754323,
width=5514,
height=3102,
creator='PtrQs',
creator_url='https://commons.wikimedia.org/wiki/User:PtrQs',
title='20120925 PlozevetBretagne LoneTree DSC07971 PtrQs',
meta_data={'description': 'SONY DSC', 'global_usage_count': 0,
'last_modified_at_source': '2019-09-01 00:38:47',
'date_originally_created': '2012-09-25 16:23:02',
'categories': [
'Coasts of Ploz\u00e9vet', 'No QIC by usr:PtrQs',
('Photographs taken with Minolta AF Zoom '
'28-70mm F2.8 G'),
'Self-published work', 'Taken with Sony DSLR-A900',
'Trees in Finist\u00e8re']}
'last_modified_at_source': '2019-09-01 00:38:47',
'categories': [
'Coasts of Ploz\u00e9vet', 'No QIC by usr:PtrQs',
('Photographs taken with Minolta AF Zoom '
'28-70mm F2.8 G'),
'Self-published work', 'Taken with Sony DSLR-A900',
'Trees in Finist\u00e8re']}
)


def test_process_image_data_adds_example_dict():
"""
`_process_image_data` calls `ImageStore.add_item` with valid arguments,
and doesn't pass unexpected arguments. Saves the item to the `ImageStore`.
"""
with open(os.path.join(RESOURCES, 'image_data_example.json')) as f:
image_data = json.load(f)
wmc._process_image_data(image_data)
assert wmc.image_store.total_items == 1


def test_process_image_data_throws_out_invalid_mediatype(monkeypatch):
image_data = {'mediatype': 'INVALID'}

Expand Down Expand Up @@ -396,14 +415,14 @@ def test_extract_creator_info_handles_link_as_partial_text():
assert expect_creator_url == actual_creator_url


def test_get_license_url_finds_license_url():
def test_get_license_info_finds_license_url():
with open(
os.path.join(RESOURCES, 'image_info_from_example_data.json')
) as f:
image_info = json.load(f)

expect_license_url = 'https://creativecommons.org/licenses/by-sa/4.0'
actual_license_url = wmc._get_license_url(image_info)
expect_license_url = 'https://creativecommons.org/licenses/by-sa/4.0/'
actual_license_url = wmc._get_license_info(image_info).url
assert actual_license_url == expect_license_url


Expand All @@ -413,7 +432,7 @@ def test_get_license_url_handles_missing_license_url():
) as f:
image_info = json.load(f)
expect_license_url = None
actual_license_url = wmc._get_license_url(image_info)
actual_license_url = wmc._get_license_info(image_info).url
assert actual_license_url == expect_license_url


Expand All @@ -423,7 +442,7 @@ def test_get_license_url_handles_cc0_license():
) as f:
image_info = json.load(f)
expect_license_url = 'https://creativecommons.org/publicdomain/zero/1.0/'
actual_license_url = wmc._get_license_url(image_info)
actual_license_url = wmc._get_license_info(image_info).url
assert actual_license_url == expect_license_url


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from urllib.parse import urlparse
import lxml.html as html

from common import DelayedRequester, ImageStore
from common import (
get_license_info,
DelayedRequester,
ImageStore,
)

from util.loader import provider_details as prov

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -236,8 +241,8 @@ def _process_image_data(image_data):
valid_mediatype = _check_mediatype(image_info)
if not valid_mediatype:
return None
license_url = _get_license_url(image_info)
if license_url is None:
license_info = _get_license_info(image_info)
if license_info.url is None:
return None
image_url = image_info.get('url')
creator, creator_url = _extract_creator_info(image_info)
Expand All @@ -246,7 +251,7 @@ def _process_image_data(image_data):
image_store.add_item(
foreign_landing_url=image_info.get('descriptionshorturl'),
image_url=image_url,
license_url=license_url,
license_info=license_info,
foreign_identifier=foreign_id,
width=image_info.get('width'),
height=image_info.get('height'),
Expand Down Expand Up @@ -345,7 +350,7 @@ def _extract_category_info(image_info):
return categories_list


def _get_license_url(image_info):
def _get_license_info(image_info):
license_url = (
image_info
.get('extmetadata', {})
Expand All @@ -367,7 +372,8 @@ def _get_license_url(image_info):
license_url = 'https://creativecommons.org/publicdomain/zero/1.0/'
else:
license_url = None
return license_url
license_info = get_license_info(license_url=license_url)
return license_info


def _create_meta_data_dict(image_data):
Expand Down

0 comments on commit 20772bb

Please sign in to comment.