diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e44c618..c0ba3645 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -287,6 +287,33 @@ Scribe does not accept direct edits to the grammar JSON files as they are source The documentation for Scribe-Data can be found at [scribe-data.readthedocs.io](https://scribe-data.readthedocs.io/en/latest/). Documentation is an invaluable way to contribute to coding projects as it allows others to more easily understand the project structure and contribute. Issues related to documentation are marked with the [`documentation`](https://github.com/scribe-org/Scribe-Data/labels/documentation) label. +### Function Docstrings + +Scribe-Data generally follows [NumPy conventions](https://numpydoc.readthedocs.io/en/latest/format.html) for documenting functions and Python code in general. Function docstrings should have the following format: + +```py +def example_function(argument: argument_type) -> return_type: + """ + An example docstring for a function so others understand your work. + + Parameters + ---------- + argument: argument_type + Description of your argument. + + Returns + ------- + return_value : return_type + Description of your return value. + """ + + ... + + return return_value +``` + +### Building the Docs + Use the following commands to build the documentation locally: ```bash diff --git a/src/scribe_data/cli/download.py b/src/scribe_data/cli/download.py index ecb82dea..b1d3db9f 100644 --- a/src/scribe_data/cli/download.py +++ b/src/scribe_data/cli/download.py @@ -35,11 +35,16 @@ def download_wrapper( wikidata_dump: Optional[str] = None, output_dir: Optional[str] = None ) -> None: - """Download Wikidata dumps. + """ + Download Wikidata lexeme dumps given user preferences. + + Parameters + ---------- + wikidata_dump : str + Optional date string in YYYYMMDD format for specific dumps. - Args: - wikidata_dump: Optional date string in YYYYMMDD format for specific dumps - output_dir: Optional directory path for the downloaded file. Defaults to 'scribe_data_wikidumps' directory + output_dir : str + Optional directory path for the downloaded file. Defaults to 'scribe_data_wikidumps' directory. """ dump_url = download_wiki_lexeme_dump(wikidata_dump or "latest-lexemes") diff --git a/src/scribe_data/wikidata/wikidata_utils.py b/src/scribe_data/wikidata/wikidata_utils.py index 0fbd0d8a..3afb79a0 100644 --- a/src/scribe_data/wikidata/wikidata_utils.py +++ b/src/scribe_data/wikidata/wikidata_utils.py @@ -40,12 +40,15 @@ def parse_date(date_string): - YYYY/MM/DD - YYYY-MM-DD - Args: - date_string (str): The date string to be parsed. - - Returns: - datetime.date: Parsed date object if the format is valid. - None: If the date format is invalid. + Parameters + ---------- + date_string : str + The date string to be parsed. + + Returns + ------- + datetime.date : Parsed date object if the format is valid. + None : If the date format is invalid. """ formats = ["%Y%m%d", "%Y/%m/%d", "%Y-%m-%d"] for fmt in formats: @@ -63,14 +66,21 @@ def available_closest_lexeme_dumpfile(target_entity, other_old_dumps, try_old_du """ Finds the closest available dump file based on the target date. - Args: - target_entity (str): The target date for which the dump is requested (format: YYYY/MM/DD or similar). - other_old_dumps (list): List of available dump folders as strings. - try_old_dump (function): A function to validate if the dump file exists. + Parameters + ---------- + target_entity : str + The target date for which the dump is requested (format: YYYY/MM/DD or similar). + + other_old_dumps : list + List of available dump folders as strings. - Returns: - str: The closest available dump file date (as a string). - None: If no suitable dump is found. + try_old_dump : function + A function to validate if the dump file exists. + + Returns + ------- + str : The closest available dump file date (as a string). + None : If no suitable dump is found. """ available_dates = [] target_date = parse_date(target_entity) @@ -102,14 +112,18 @@ def download_wiki_lexeme_dump(target_entity="latest-lexemes"): """ Downloads a Wikimedia lexeme dump based on the specified target entity or date. - Args: - target_entity (str, optional): The target dump to download. Defaults to "latest-lexemes". + Parameters + ---------- + target_entity : str, optional + The target dump to download. Defaults to "latest-lexemes". + - If "latest-lexemes", downloads the latest dump. - If a valid date (e.g., YYYYMMDD), attempts to download the dump for that date. - Returns: - str: The URL of the requested or closest available dump. - None: If no suitable dump is found or the request fails. + Returns + ------- + str : The URL of the requested or closest available dump. + None : If no suitable dump is found or the request fails. """ base_url = "https://dumps.wikimedia.org/wikidatawiki/entities" @@ -117,12 +131,15 @@ def try_old_dump(target_entity): """ Checks if the specified dump file exists for a target entity. - Args: - target_entity (str): The target entity or date folder to check. + Parameters + ---------- + target_entity : str + The target entity or date folder to check. - Returns: - str: The URL of the dump file if it exists. - None: If the dump file does not exist. + Returns + ------- + str : The URL of the dump file if it exists. + None : If the dump file does not exist. """ entity_url = f"{base_url}/{target_entity}/" entity_response = requests.get(entity_url)