From f6685e8dd4d9290626d35ab6605ab78c2aa16ad6 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Tue, 29 Oct 2024 00:36:45 +0100 Subject: [PATCH] #280 Update total command using QID user flow --- src/scribe_data/cli/total.py | 69 +++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py index 6d7881ef..466f0c73 100644 --- a/src/scribe_data/cli/total.py +++ b/src/scribe_data/cli/total.py @@ -23,6 +23,7 @@ from http.client import IncompleteRead from urllib.error import HTTPError +import requests from SPARQLWrapper import JSON from scribe_data.utils import ( @@ -101,10 +102,42 @@ def get_datatype_list(language): return data_types else: # return all data types - print("Language is not present in Scribe-Data. Checking all data types.") return data_type_metadata +def check_qid_is_language(qid: str): + """ + Parameters + ---------- + qid : str + The QID to check Wikidata to see if it's a language and return its English label. + + Outputs + ------- + str + The English label of the Wikidata language entity. + + Raises + ------ + ValueError + An invalid QID that's not a language has been passed. + """ + api_endpoint = "https://www.wikidata.org/w/rest.php/wikibase/v0" + request_string = f"{api_endpoint}/entities/items/{qid}" + + request = requests.get(request_string, timeout=5) + request_result = request.json() + + if request_result["statements"]["P31"]: + instance_of_values = request_result["statements"]["P31"] + for val in instance_of_values: + if val["value"]["content"] == "Q34770": + print(f"{request_result['labels']['en']} ({qid}) is a language.\n") + return request_result["labels"]["en"] + + raise ValueError("The passed Wikidata QID is not a language.") + + # MARK: Print @@ -125,14 +158,28 @@ def print_total_lexemes(language: str = None): if language is None: print("Returning total counts for all languages and data types...\n") - elif language.startswith("Q") and language[1:].isdigit(): - print(f"Wikidata QID {language} passed. Checking all data types.\n") + elif ( + isinstance(language, str) + and language.startswith("Q") + and language[1:].isdigit() + ): + print( + f"Wikidata QID {language} passed. Checking validity and then all data types." + ) + language = check_qid_is_language(qid=language) else: print(f"Returning total counts for {language} data types...\n") - print(f"{'Language':<15} {'Data Type':<25} {'Total Wikidata Lexemes':<25}") - print("=" * 64) + def print_total_header(): + """ + Prints the header of the total command output. + """ + print(f"{'Language':<20} {'Data Type':<25} {'Total Wikidata Lexemes':<25}") + print("=" * 70) + print( + f"{language.capitalize():<20} {dt.replace('_', '-'): <25} {total_lexemes:<25}" + ) if language is None: # all languages languages = list_all_languages(language_metadata) @@ -145,13 +192,11 @@ def print_total_lexemes(language: str = None): total_lexemes = get_total_lexemes(lang, dt, False) total_lexemes = f"{total_lexemes:,}" if first_row: - print( - f"{lang.capitalize():<15} {dt.replace('_', '-'): <25} {total_lexemes:<25}" - ) + print_total_header() first_row = False else: - print(f"{'':<15} {dt.replace('_', ' '): <25} {total_lexemes:<25}") + print(f"{'':<20} {dt.replace('_', ' '): <25} {total_lexemes:<25}") print() @@ -170,13 +215,11 @@ def print_total_lexemes(language: str = None): total_lexemes = get_total_lexemes(language, dt, False) total_lexemes = f"{total_lexemes:,}" if first_row: - print( - f"{language.capitalize():<15} {dt.replace('_', '-'): <25} {total_lexemes:<25}" - ) + print_total_header() first_row = False else: - print(f"{'':<15} {dt.replace('_', ' '): <25} {total_lexemes:<25}") + print(f"{'':<20} {dt.replace('_', ' '): <25} {total_lexemes:<25}") print()