Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Git action for project structure #378

Merged
merged 4 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/check_project_structure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Check Project Structure
on:
push:
branches: [main]
pull_request:
branches: [main]
types: [opened, reopened, synchronize]

jobs:
structure-check:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Run check_project_structure.py
working-directory: ./src/scribe_data/check
run: python check_project_structure.py

- name: Post-run status
if: failure()
run: echo "Project structure check failed. Please fix the reported errors."
2 changes: 1 addition & 1 deletion .github/workflows/check_query_identifiers.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: check_query_identifiers
name: Check Query Identifiers
on:
push:
branches: [main]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_ci.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: pr_ci
name: CI
on:
push:
branches: [main]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_maintainer_checklist.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: pr_maintainer_checklist
name: PR Maintainer Checklist
on:
pull_request_target:
branches:
Expand Down
162 changes: 162 additions & 0 deletions src/scribe_data/check/check_project_structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import os

# Expected languages and data types.
LANGUAGES = {
"Arabic",
"English",
"Greek",
"Italian",
"Malayalam",
"Russian",
"Tamil",
"Basque",
"Esperanto",
"Hausa",
"Japanese",
"Norwegian",
"Slovak",
"Ukrainian",
"Bengali",
"Estonian",
"Hebrew",
"Korean",
"Pidgin",
"Spanish",
"Yoruba",
"Chinese",
"Finnish",
"Hindustani",
"Kurmanji",
"Polish",
"Swahili",
"Czech",
"French",
"Indonesian",
"Latin",
"Portuguese",
"Swedish",
"Danish",
"German",
"Malay",
"Punjabi",
"Tajik",
}

DATA_TYPES = {
"adjectives",
"adverbs",
"articles",
"autosuggestions",
"conjunctions",
"emoji_keywords",
"nouns",
"personal_pronouns",
"postpositions",
"prepositions",
"pronouns",
"proper_nouns",
"verbs",
}

# Sub-subdirectories expected for specific languages.
SUB_DIRECTORIES = {
"Chinese": ["Mandarin"],
"Hindustani": ["Urdu", "Hindi"],
"Norwegian": ["Nynorsk", "Bokmål"],
"Pidgin": ["Nigerian"],
"Punjabi": ["Shahmukhi", "Gurmukhi"],
}


# Base directory path.
BASE_DIR = "../language_data_extraction"


def validate_project_structure():
"""
Validate that all directories follow the expected project structure and check for unexpected files and directories."""
errors = []

if not os.path.exists(BASE_DIR):
print(f"Error: Base directory '{BASE_DIR}' does not exist.")
exit(1)

# Check for unexpected files in BASE_DIR
for item in os.listdir(BASE_DIR):
item_path = os.path.join(BASE_DIR, item)
if os.path.isfile(item_path) and item != "__init__.py":
errors.append(f"Unexpected file found in BASE_DIR: {item}")

# Iterate through the language directories
for language in os.listdir(BASE_DIR):
language_path = os.path.join(BASE_DIR, language)

if not os.path.isdir(language_path) or language == "__init__.py":
continue

if language not in LANGUAGES:
errors.append(f"Unexpected language directory: {language}")
continue

# Check for unexpected files in language directory
for item in os.listdir(language_path):
item_path = os.path.join(language_path, item)
if os.path.isfile(item_path) and item != "__init__.py":
errors.append(f"Unexpected file found in {language} directory: {item}")

found_subdirs = {
item
for item in os.listdir(language_path)
if os.path.isdir(os.path.join(language_path, item))
and item != "__init__.py"
}

if language in SUB_DIRECTORIES:
expected_subdirs = set(SUB_DIRECTORIES[language])
unexpected_subdirs = found_subdirs - expected_subdirs
missing_subdirs = expected_subdirs - found_subdirs

if unexpected_subdirs:
errors.append(
f"Unexpected sub-subdirectories in '{language}': {unexpected_subdirs}"
)
if missing_subdirs:
errors.append(
f"Missing sub-subdirectories in '{language}': {missing_subdirs}"
)

# Check contents of expected sub-subdirectories
for subdir in expected_subdirs:
subdir_path = os.path.join(language_path, subdir)
if os.path.exists(subdir_path):
for item in os.listdir(subdir_path):
item_path = os.path.join(subdir_path, item)
if os.path.isfile(item_path) and item != "__init__.py":
errors.append(
f"Unexpected file found in {language}/{subdir}: {item}"
)

elif os.path.isdir(item_path) and item not in DATA_TYPES:
errors.append(
f"Unexpected directory found in {language}/{subdir}: {item}"
)

elif unexpected_data_types := found_subdirs - DATA_TYPES:
errors.append(
f"Unexpected subdirectories in '{language}': {unexpected_data_types}"
)

if errors:
print("Errors found:")
for error in errors:
print(f" - {error}")
exit(1)

else:
print(
"All directories and files are correctly named and organized, and no unexpected files or directories were found."
)


if __name__ == "__main__":
validate_project_structure()

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

"""
Generates keyword-emoji relationships from a selection of Hausa words.

Expand Down
Loading