Merge branch 'main' into migration

scribe-org · Nov 8, 2024 · b72c928 · b72c928
2 parents 02df986 + 647d1d9
commit b72c928
Show file tree

Hide file tree

Showing 14 changed files with 621 additions and 124 deletions.
diff --git a/.github/workflows/python_package_ci.yaml b/.github/workflows/python_package_ci.yaml
@@ -38,7 +38,7 @@ jobs:
         run: |
           brew bundle install --file=Brewfile
           # configure PATH & PKG_CONFIG_PATH as per
-          # https://gitlab.pyicu.org/main/pyicu#installing-pyicu
+          # https://gitlab.pyicu.org/main/pyicu
           echo "/opt/homebrew/opt/icu4c/bin:/opt/homebrew/opt/icu4c/sbin:$PATH" >> $GITHUB_PATH
           echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/opt/homebrew/opt/icu4c/lib/pkgconfig" >> $GITHUB_ENV
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,3 +13,9 @@ repos:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
       - id: ruff-format
+
+  - repo: https://github.com/tcort/markdown-link-check
+    rev: v3.13.6
+    hooks:
+      - id: markdown-link-check
+        args: [-q]
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -9,7 +9,7 @@
     :target: https://github.com/scribe-org/Scribe-Data
 
 .. |rtd| image:: https://img.shields.io/readthedocs/scribe-data.svg?label=%20&logo=read-the-docs&logoColor=ffffff
-    :target: http://scribe-datareadthedocs.io/en/latest/
+    :target: http://scribe-data.readthedocs.io/en/latest/
 
 .. |issues| image:: https://img.shields.io/github/issues/scribe-org/Scribe-Data?label=%20&logo=github
     :target: https://github.com/scribe-org/Scribe-Data/issues

diff --git a/docs/source/scribe_data/cli.rst b/docs/source/scribe_data/cli.rst
@@ -143,14 +143,31 @@ Options:
 - ``-ot, --output-type {json,csv,tsv}``: The output file type.
 - ``-ope, --outputs-per-entry OUTPUTS_PER_ENTRY``: How many outputs should be generated per data entry.
 - ``-o, --overwrite``: Whether to overwrite existing files (default: False).
-- ``-a, --all ALL``: Get all languages and data types.
+- ``-a, --all``: Get all languages and data types. Can be combined with `-dt` to get all languages for a specific data type, or with `-lang` to get all data types for a specific language.
 - ``-i, --interactive``: Run in interactive mode.
 
-Example:
+Examples:
+
+.. code-block:: bash
+
+    $ scribe-data get --all
+    Getting data for all languages and all data types...
+
+.. code-block:: bash
+
+    $ scribe-data get --all -dt nouns
+    Getting all nouns for all languages...
+
+.. code-block:: bash
+
+    $ scribe-data get --all -lang English
+    Getting all data types for English...
 
 .. code-block:: bash
 
     $ scribe-data get -l English --data-type verbs -od ~/path/for/output
+    Getting and formatting English verbs
+    Data updated: 100%|████████████████████████| 1/1 [00:XY<00:00, XY.Zs/process]
 
 Behavior and Output:
 ^^^^^^^^^^^^^^^^^^^^
@@ -180,7 +197,7 @@ Behavior and Output:
     .. code-block:: text
 
         Getting and formatting English verbs
-        Data updated: 100%|████████████████████████| 1/1 [00:29<00:00, 29.73s/process]
+        Data updated: 100%|████████████████████████| 1/1 [00:XY<00:00, XY.Zs/process]
 
 4. If no data is found, you'll see a warning:
 
@@ -242,30 +259,63 @@ Usage:
 Options:
 ^^^^^^^^
 
-- ``-lang, --language LANGUAGE``: The language(s) to check totals for.
+- ``-lang, --language LANGUAGE``: The language(s) to check totals for. Can be a language name or QID.
 - ``-dt, --data-type DATA_TYPE``: The data type(s) to check totals for.
-- ``-a, --all ALL``: Get totals for all languages and data types.
+- ``-a, --all``: Get totals for all languages and data types.
 
 Examples:
 
 .. code-block:: text
 
-    $scribe-data total -dt nouns  # verbs, adjectives, etc
-    Data type: nouns
-    Total number of lexemes: 123456
+    $ scribe-data total --all
+    Total lexemes for all languages and data types:
+    ==============================================
+    Language     Data Type     Total Lexemes
+    ==============================================
+    English      nouns         123,456
+                 verbs         234,567
+    ...
 
 .. code-block:: text
 
-    $scribe-data total -lang English
-    Language: English
-    Total number of lexemes: 123456
+    $ scribe-data total --language English
+    Returning total counts for English data types...
+
+    Language        Data Type                 Total Wikidata Lexemes
+    ================================================================
+    English         adjectives                12,345
+                    adverbs                   23,456
+                    nouns                     34,567
+    ...
 
 .. code-block:: text
 
-    $scribe-data total -lang English -dt nouns  # verbs, adjectives, etc
+    $ scribe-data total --language Q1860
+    Wikidata QID Q1860 passed. Checking all data types.
+
+    Language        Data Type                 Total Wikidata Lexemes
+    ================================================================
+    Q1860           adjectives                12,345
+                    adverbs                   23,456
+                    articles                  30
+                    conjunctions              40
+                    nouns                     56,789
+                    personal pronouns         60
+    ...
+
+.. code-block:: text
+
+    $ scribe-data total --language English -dt nouns
     Language: English
     Data type: nouns
-    Total number of lexemes: 12345
+    Total number of lexemes: 12,345
+
+.. code-block:: text
+
+    $ scribe-data total --language Q1860 -dt verbs
+    Language: Q1860
+    Data type: verbs
+    Total number of lexemes: 23,456
 
 Convert Command
 ~~~~~~~~~~~~~~~

diff --git a/docs/source/scribe_data/load/index.rst b/docs/source/scribe_data/load/index.rst
@@ -3,11 +3,6 @@ load/
 
 `View code on Github <https://github.com/scribe-org/Scribe-Data/tree/main/src/scribe_data/load>`_
 
-.. toctree::
-    :maxdepth: 2
-
-    update_files/index
-
 .. toctree::
     :maxdepth: 1
 

diff --git a/docs/source/scribe_data/load/update_files/index.rst b/docs/source/scribe_data/load/update_files/index.rst
diff --git a/docs/source/scribe_data/unicode/index.rst b/docs/source/scribe_data/unicode/index.rst
@@ -5,7 +5,7 @@ unicode/
 
 The Scribe-Data Unicode process is powered by `cldr-json <https://github.com/unicode-org/cldr-json>`_ data from the `Unicode Consortium <https://home.unicode.org/>`_ and `PyICU <https://gitlab.pyicu.org/main/pyicu>`_, a Python extension that wraps the Unicode Consortium's `International Components for Unicode (ICU) <https://github.com/unicode-org/icu>`_ C++ project.
 
-Please see the `installation guide for PyICU <https://gitlab.pyicu.org/main/pyicu#installing-pyicu>`_ as the extension must be linked to ICU on your machine to work properly.
+Please see the `installation guide for PyICU <https://gitlab.pyicu.org/main/pyicu>`_ as the extension must be linked to ICU on your machine to work properly.
 
 .. toctree::
     :maxdepth: 1

diff --git a/src/scribe_data/check/check_pyicu.py b/src/scribe_data/check/check_pyicu.py
@@ -57,7 +57,7 @@ def get_python_version_and_architecture():
 
 def fetch_wheel_releases():
     """
-    Fetch the release data for PyICU from GitHub.
+    Fetch the release data for PyICU from GitHub with error handling for rate limits.
 
     Returns
     -------
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,7 +57,7 @@ def get_python_version_and_architecture(): @@
     def fetch_wheel_releases():
         """
-        Fetch the release data for PyICU from GitHub.
+        Fetch the release data for PyICU from GitHub with error handling for rate limits.
         Returns
         -------
@@ Expand Down @@