From f6685e8dd4d9290626d35ab6605ab78c2aa16ad6 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 29 Oct 2024 00:36:45 +0100
Subject: [PATCH] #280 Update total command using QID user flow

---
 src/scribe_data/cli/total.py | 69 +++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
index 6d7881ef..466f0c73 100644
--- a/src/scribe_data/cli/total.py
+++ b/src/scribe_data/cli/total.py
@@ -23,6 +23,7 @@
 from http.client import IncompleteRead
 from urllib.error import HTTPError
 
+import requests
 from SPARQLWrapper import JSON
 
 from scribe_data.utils import (
@@ -101,10 +102,42 @@ def get_datatype_list(language):
         return data_types
 
     else:  # return all data types
-        print("Language is not present in Scribe-Data. Checking all data types.")
         return data_type_metadata
 
 
+def check_qid_is_language(qid: str):
+    """
+    Parameters
+    ----------
+        qid : str
+            The QID to check Wikidata to see if it's a language and return its English label.
+
+    Outputs
+    -------
+        str
+            The English label of the Wikidata language entity.
+
+    Raises
+    ------
+        ValueError
+            An invalid QID that's not a language has been passed.
+    """
+    api_endpoint = "https://www.wikidata.org/w/rest.php/wikibase/v0"
+    request_string = f"{api_endpoint}/entities/items/{qid}"
+
+    request = requests.get(request_string, timeout=5)
+    request_result = request.json()
+
+    if request_result["statements"]["P31"]:
+        instance_of_values = request_result["statements"]["P31"]
+        for val in instance_of_values:
+            if val["value"]["content"] == "Q34770":
+                print(f"{request_result['labels']['en']} ({qid}) is a language.\n")
+                return request_result["labels"]["en"]
+
+    raise ValueError("The passed Wikidata QID is not a language.")
+
+
 # MARK: Print
 
 
@@ -125,14 +158,28 @@ def print_total_lexemes(language: str = None):
     if language is None:
         print("Returning total counts for all languages and data types...\n")
 
-    elif language.startswith("Q") and language[1:].isdigit():
-        print(f"Wikidata QID {language} passed. Checking all data types.\n")
+    elif (
+        isinstance(language, str)
+        and language.startswith("Q")
+        and language[1:].isdigit()
+    ):
+        print(
+            f"Wikidata QID {language} passed. Checking validity and then all data types."
+        )
+        language = check_qid_is_language(qid=language)
 
     else:
         print(f"Returning total counts for {language} data types...\n")
 
-    print(f"{'Language':<15} {'Data Type':<25} {'Total Wikidata Lexemes':<25}")
-    print("=" * 64)
+    def print_total_header():
+        """
+        Prints the header of the total command output.
+        """
+        print(f"{'Language':<20} {'Data Type':<25} {'Total Wikidata Lexemes':<25}")
+        print("=" * 70)
+        print(
+            f"{language.capitalize():<20} {dt.replace('_', '-'): <25} {total_lexemes:<25}"
+        )
 
     if language is None:  # all languages
         languages = list_all_languages(language_metadata)
@@ -145,13 +192,11 @@ def print_total_lexemes(language: str = None):
                 total_lexemes = get_total_lexemes(lang, dt, False)
                 total_lexemes = f"{total_lexemes:,}"
                 if first_row:
-                    print(
-                        f"{lang.capitalize():<15} {dt.replace('_', '-'): <25} {total_lexemes:<25}"
-                    )
+                    print_total_header()
                     first_row = False
 
                 else:
-                    print(f"{'':<15} {dt.replace('_', ' '): <25} {total_lexemes:<25}")
+                    print(f"{'':<20} {dt.replace('_', ' '): <25} {total_lexemes:<25}")
 
             print()
 
@@ -170,13 +215,11 @@ def print_total_lexemes(language: str = None):
             total_lexemes = get_total_lexemes(language, dt, False)
             total_lexemes = f"{total_lexemes:,}"
             if first_row:
-                print(
-                    f"{language.capitalize():<15} {dt.replace('_', '-'): <25} {total_lexemes:<25}"
-                )
+                print_total_header()
                 first_row = False
 
             else:
-                print(f"{'':<15} {dt.replace('_', ' '): <25} {total_lexemes:<25}")
+                print(f"{'':<20} {dt.replace('_', ' '): <25} {total_lexemes:<25}")
 
         print()