diff --git a/.cookiecutter.json b/.cookiecutter.json new file mode 100644 index 00000000..e3885bf8 --- /dev/null +++ b/.cookiecutter.json @@ -0,0 +1,22 @@ +{ + "_copy_without_render": [ + "*.rst_t" + ], + "_jinja2_env_vars": { + "lstrip_blocks": true, + "trim_blocks": true + }, + "_template": "https://github.com/statisticsnorway/ssb-pypitemplate.git", + "author": "Miles Winther", + "code_quality_level": "High", + "copyright_owner": "Statistics Norway", + "copyright_year": "2022", + "development_status": "Development Status :: 4 - Beta", + "email": "mmw@ssb.no", + "friendly_name": "Datadoc", + "github_organization": "statisticsnorway", + "license": "MIT", + "package_name": "datadoc", + "project_name": "datadoc", + "version": "0.4.0" +} diff --git a/.cruft.json b/.cruft.json new file mode 100644 index 00000000..b90f66a1 --- /dev/null +++ b/.cruft.json @@ -0,0 +1,30 @@ +{ + "template": "https://github.com/statisticsnorway/ssb-pypitemplate.git", + "commit": "ade308e2279e8de90c4ba67d8148e984b3465d36", + "checkout": "2023.12.11", + "context": { + "cookiecutter": { + "project_name": "datadoc", + "package_name": "datadoc", + "friendly_name": "Datadoc", + "copyright_owner": "Statistics Norway", + "copyright_year": "2022", + "author": "Miles Winther", + "email": "mmw@ssb.no", + "github_organization": "statisticsnorway", + "version": "0.4.0", + "license": "MIT", + "development_status": "Development Status :: 4 - Beta", + "code_quality_level": "High", + "_copy_without_render": [ + "*.rst_t" + ], + "_jinja2_env_vars": { + "lstrip_blocks": true, + "trim_blocks": true + }, + "_template": "https://github.com/statisticsnorway/ssb-pypitemplate.git" + } + }, + "directory": null +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..a8faee78 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,toml}] +indent_style = space +indent_size = 4 + +[*.yml,yaml,json] +indent_style = space +indent_size = 2 diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..78828b24 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,32 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + open-pull-requests-limit: 99 + groups: + github-action-dependencies: + patterns: + - "*" + - package-ecosystem: "pip" + directory: "/.github/workflows" + schedule: + interval: "monthly" + open-pull-requests-limit: 99 + groups: + workflows-dependencies: + patterns: + - "*" + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "monthly" + versioning-strategy: lockfile-only + allow: + - dependency-type: "all" + open-pull-requests-limit: 99 + groups: + poetry-dependencies: + patterns: + - "*" diff --git a/.github/labels.yml b/.github/labels.yml new file mode 100644 index 00000000..7388c8ec --- /dev/null +++ b/.github/labels.yml @@ -0,0 +1,66 @@ +--- +# Labels names are important as they are used by Release Drafter to decide +# regarding where to record them in changelog or if to skip them. +# +# The repository labels will be automatically configured using this file and +# the GitHub Action https://github.com/marketplace/actions/github-labeler. +- name: breaking + description: Breaking Changes + color: bfd4f2 +- name: bug + description: Something isn't working + color: d73a4a +- name: ci + description: Continuous Integration + color: 4a97d6 +- name: dependencies + description: Pull requests that update a dependency file + color: 0366d6 +- name: documentation + description: Improvements or additions to documentation + color: 0075ca +- name: duplicate + description: This issue or pull request already exists + color: cfd3d7 +- name: enhancement + description: New feature or request + color: a2eeef +- name: github_actions + description: Pull requests that update Github_actions code + color: "000000" +- name: good first issue + description: Good for newcomers + color: 7057ff +- name: help wanted + description: Extra attention is needed + color: 008672 +- name: invalid + description: This doesn't seem right + color: e4e669 +- name: performance + description: Performance + color: "016175" +- name: python + description: Pull requests that update Python code + color: 2b67c6 +- name: question + description: Further information is requested + color: d876e3 +- name: refactoring + description: Refactoring + color: ef67c4 +- name: removal + description: Removals and Deprecations + color: 9ae7ea +- name: style + description: Style + color: c120e5 +- name: testing + description: Testing + color: b1fc6f +- name: wontfix + description: This will not be worked on + color: ffffff +- name: "skip-changelog" + description: Changes that should be omitted from the release notes + color: ededed diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 00000000..2e77836a --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,30 @@ +categories: + - title: ":boom: Breaking Changes" + label: "breaking" + - title: ":rocket: Features" + label: "enhancement" + - title: ":fire: Removals and Deprecations" + label: "removal" + - title: ":beetle: Fixes" + label: "bug" + - title: ":racehorse: Performance" + label: "performance" + - title: ":rotating_light: Testing" + label: "testing" + - title: ":construction_worker: Continuous Integration" + label: "ci" + - title: ":books: Documentation" + label: "documentation" + - title: ":hammer: Refactoring" + label: "refactoring" + - title: ":lipstick: Style" + label: "style" + - title: ":package: Dependencies" + labels: + - "dependencies" +exclude-labels: + - "skip-changelog" +template: | + ## Changes + + $CHANGES diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt new file mode 100644 index 00000000..37bb30b6 --- /dev/null +++ b/.github/workflows/constraints.txt @@ -0,0 +1,5 @@ +pip==23.3.1 +nox==2023.4.22 +nox-poetry==1.0.3 +poetry==1.7.1 +virtualenv==20.25.0 diff --git a/.github/workflows/dapla-lab-ci.yml b/.github/workflows/docker-build.yml similarity index 90% rename from .github/workflows/dapla-lab-ci.yml rename to .github/workflows/docker-build.yml index 0c225f68..079aca5f 100644 --- a/.github/workflows/dapla-lab-ci.yml +++ b/.github/workflows/docker-build.yml @@ -1,4 +1,4 @@ -name: Datadoc Dapla Lab CI +name: Docker build on: push: @@ -6,15 +6,14 @@ on: - "**" tags: - "*" - pull_request: paths: - - 'datadoc/**' + - 'src/datadoc/**' - 'poetry.lock' - 'Dockerfile' - - '.github/workflows/dapla-lab-ci.yml' + - '.github/workflows/docker-build.yml' env: - REGISTRY: europe-north1-docker.pkg.dev/artifact-registry-5n/dapla-lab-docker/onyxia + REGISTRY: europe-north1-docker.pkg.dev/artifact-registry-5n/dapla-metadata-docker/datadoc IMAGE: datadoc TAG: ${{ github.ref_name }}-${{ github.sha }} @@ -31,7 +30,7 @@ jobs: uses: "google-github-actions/auth@v1.1.1" with: workload_identity_provider: "projects/848539402404/locations/global/workloadIdentityPools/gh-actions/providers/gh-actions" - service_account: "gh-actions-dapla-lab@artifact-registry-5n.iam.gserviceaccount.com" + service_account: "gh-actions-dapla-metadata@artifact-registry-5n.iam.gserviceaccount.com" token_format: "access_token" - name: Set up QEMU uses: docker/setup-qemu-action@v2 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..24cd0aa6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,67 @@ +name: Documentation + +on: + push: + branches: + - main + - master + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Install Poetry + run: | + pipx install --pip-args "-c .github/workflows/constraints.txt" poetry + poetry --version + + - name: Set up Python + uses: actions/setup-python@v5.0.0 + with: + python-version: "3.11" + cache: "poetry" + + - name: Install dependencies + run: | + poetry install --no-root + + - name: Build doc with Sphinx + run: | + poetry run sphinx-build -W docs docs/_build + + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + path: "docs/_build" + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Setup Pages + uses: actions/configure-pages@v4 + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v3 diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 00000000..de14cc5f --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,25 @@ +name: Labeler + +on: + push: + branches: + - main + - master + paths: + - ".github/labels.yml" + - ".github/workflows/labeler.yml" + +jobs: + labeler: + runs-on: ubuntu-latest + permissions: + issues: write + contents: read + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Run Labeler + uses: crazy-max/ghaction-github-labeler@v5.0.0 + with: + skip-delete: true diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index d437cad7..00000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: Publish package to PyPI -on: - push: - tags: - - "v*.*.*" -jobs: - build: - runs-on: ubuntu-latest - environment: PyPI - steps: - - uses: actions/checkout@v2 - - name: Build and publish to pypi - uses: JRubics/poetry-publish@v1.12 - with: - pypi_token: ${{ secrets.PYPI_TOKEN }} - ignore_dev_requirements: "yes" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..5bc9eb26 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,78 @@ +name: Release + +on: + push: + branches: + - main + - master + +jobs: + release: + name: Release + runs-on: ubuntu-latest + permissions: + id-token: write + contents: write + pull-requests: read + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Set up Python + uses: actions/setup-python@v5.0.0 + with: + python-version: "3.11" + + - name: Upgrade pip + run: | + pip install -c .github/workflows/constraints.txt pip + pip --version + + - name: Install Poetry + run: | + pip install -c .github/workflows/constraints.txt poetry + poetry --version + + - name: Check if there is a parent commit + id: check-parent-commit + run: | + echo "sha=$(git rev-parse --verify --quiet HEAD^)" >> $GITHUB_OUTPUT + + - name: Detect and tag new version + id: check-version + if: steps.check-parent-commit.outputs.sha + uses: salsify/action-detect-and-tag-new-version@v2.0.3 + with: + version-command: | + bash -o pipefail -c "poetry version | cut -f 2 -d' '" + + - name: Bump version for developmental release + if: "! steps.check-version.outputs.tag" + run: | + poetry version patch && + version=$(poetry version | awk '{ print $2 }') && + poetry version $version.dev.$(date +%s) + + - name: Build package + run: | + poetry build --ansi + + - name: Publish package on PyPI + if: steps.check-version.outputs.tag + uses: pypa/gh-action-pypi-publish@v1.8.11 + + - name: Publish package on TestPyPI + if: "! steps.check-version.outputs.tag" + uses: pypa/gh-action-pypi-publish@v1.8.11 + with: + repository-url: https://test.pypi.org/legacy/ + + - name: Publish the release notes + uses: release-drafter/release-drafter@v5.25.0 + with: + publish: ${{ steps.check-version.outputs.tag != '' }} + tag: ${{ steps.check-version.outputs.tag }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/scheduled-update.yml b/.github/workflows/scheduled-update.yml deleted file mode 100644 index 1f489924..00000000 --- a/.github/workflows/scheduled-update.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Update dependencies - -on: - schedule: - # 07:00 on Fridays - - cron: "0 7 * * Fri" - # Allow manual triggering - workflow_dispatch: - -jobs: - autoupdate_dependencies: - runs-on: ubuntu-latest - steps: - - name: Generate a token - id: generate_token - uses: tibdex/github-app-token@b62528385c34dbc9f38e5f4225ac829252d1ea92 - with: - app_id: ${{ secrets.DEPENDENCY_UPDATER_PYTHON_ID }} - private_key: ${{ secrets.DEPENDENCY_UPDATER_PYTHON_PRIVATE_KEY }} - - uses: actions/checkout@v3 - with: - token: ${{ steps.generate_token.outputs.token }} - - uses: actions/setup-python@v2 - with: - python-version: "3.11" - - uses: Gr1N/setup-poetry@v8 - - uses: actions/cache@v2 - with: - path: ~/.cache/pypoetry/virtualenvs - key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }} - - name: Print Poetry version - shell: bash - run: poetry --version - - name: Install poetry relax plugin - shell: bash - run: poetry self add poetry-relax - - name: Print files in folder - shell: bash - run: ls - - name: Run autoupdate - shell: bash - env: - update_command: "poetry relax --update" - default_branch: ${{ github.event.repository.default_branch }} - update_path: "" # Set to run in a specific path with run update_command - on_changes_command: "" # Command to run when changes are detected - run: | - .github/workflows/update-dependencies.sh \ - ${{ steps.generate_token.outputs.token }} \ - "${update_command}" \ - "${default_branch}" \ - "${update_path}" \ - "${on_changes_command}" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..cacc02f5 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,167 @@ +name: Tests + +on: + push: + branches: + - main + - master + pull_request: + +jobs: + tests: + name: ${{ matrix.session }} ${{ matrix.python }} / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - { python: "3.11", os: "ubuntu-latest", session: "pre-commit" } + - { python: "3.11", os: "ubuntu-latest", session: "mypy" } + - { python: "3.10", os: "ubuntu-latest", session: "mypy" } + - { python: "3.12", os: "ubuntu-latest", session: "mypy" } + - { python: "3.11", os: "ubuntu-latest", session: "tests" } + - { python: "3.10", os: "ubuntu-latest", session: "tests" } + - { python: "3.12", os: "ubuntu-latest", session: "tests" } + - { python: "3.11", os: "windows-latest", session: "tests" } + - { python: "3.11", os: "macos-latest", session: "tests" } + - { python: "3.11", os: "ubuntu-latest", session: "typeguard" } + - { python: "3.11", os: "ubuntu-latest", session: "xdoctest" } + - { python: "3.11", os: "ubuntu-latest", session: "docs-build" } + + env: + NOXSESSION: ${{ matrix.session }} + FORCE_COLOR: "1" + PRE_COMMIT_COLOR: "always" + + steps: + - name: Check out the repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5.0.0 + with: + python-version: ${{ matrix.python }} + + - name: Upgrade pip + run: | + pip install -c .github/workflows/constraints.txt pip + pip --version + + - name: Upgrade pip in virtual environments + shell: python + run: | + import os + import pip + + with open(os.environ["GITHUB_ENV"], mode="a") as io: + print(f"VIRTUALENV_PIP={pip.__version__}", file=io) + + - name: Install Poetry + run: | + pipx install --pip-args "-c .github/workflows/constraints.txt" poetry + poetry --version + + - name: Install Nox + run: | + pipx install --pip-args "-c .github/workflows/constraints.txt" nox + pipx inject --pip-args "-c .github/workflows/constraints.txt" nox nox-poetry + nox --version + + - name: Compute pre-commit cache key + if: matrix.session == 'pre-commit' + id: pre-commit-cache + shell: python + run: | + import hashlib + import subprocess + import sys + + python = "py{}.{}".format(*sys.version_info[:2]) + payload = sys.version.encode() + sys.executable.encode() + digest = hashlib.sha256(payload).hexdigest() + result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8]) + cmd = f'echo "result={result}" >> $GITHUB_OUTPUT' + subprocess.run(cmd, shell=True) + + - name: Restore pre-commit cache + uses: actions/cache@v3 + if: matrix.session == 'pre-commit' + with: + path: ~/.cache/pre-commit + key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }} + restore-keys: | + ${{ steps.pre-commit-cache.outputs.result }}- + + - name: Run Nox + run: | + nox --python=${{ matrix.python }} + + - name: Upload coverage data + if: always() && matrix.session == 'tests' + uses: "actions/upload-artifact@v3" + with: + name: coverage-data + path: ".coverage.*" + + - name: Upload documentation + if: matrix.session == 'docs-build' + uses: actions/upload-artifact@v3 + with: + name: docs + path: docs/_build + + coverage: + runs-on: ubuntu-latest + needs: tests + steps: + - name: Check out the repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + + - name: Set up Python + uses: actions/setup-python@v5.0.0 + with: + python-version: "3.11" + + - name: Upgrade pip + run: | + pip install -c .github/workflows/constraints.txt pip + pip --version + + - name: Install Poetry + run: | + pipx install --pip-args "-c .github/workflows/constraints.txt" poetry + poetry --version + + - name: Install Nox + run: | + pipx install --pip-args "-c .github/workflows/constraints.txt" nox + pipx inject --pip-args "-c .github/workflows/constraints.txt" nox nox-poetry + nox --version + + - name: Download coverage data + uses: actions/download-artifact@v3 + with: + name: coverage-data + + - name: Combine coverage data and display human readable report + run: | + nox --session=coverage + + - name: Create coverage report + run: | + nox --session=coverage -- xml + + # Need to fix coverage source paths for SonarCloud scanning in GitHub actions. + # Replace root path with /github/workspace (mounted in docker container). + - name: Override coverage source paths for SonarCloud + run: sed -i "s/\/home\/runner\/work\/datadoc\/datadoc/\/github\/workspace/g" coverage.xml + + - name: SonarCloud Scan + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + # No need to run SonarCloud analysis if dependabot update or token not defined + if: env.SONAR_TOKEN != '' && (github.actor != 'dependabot[bot]') + uses: SonarSource/sonarcloud-github-action@v2.1.0 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml deleted file mode 100644 index e6793661..00000000 --- a/.github/workflows/unit-tests.yml +++ /dev/null @@ -1,60 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Datadoc Unit tests - -on: - pull_request: - branches: - - "*" - -permissions: - contents: read - issues: write - pull-requests: write - -jobs: - run-unit-tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 - with: - python-version: "3.11" - - uses: Gr1N/setup-poetry@v8 - - uses: actions/cache@v2 - with: - path: ~/.cache/pypoetry/virtualenvs - key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }} - - name: Print Poetry version - run: poetry --version - - name: Install dependencies - run: | - poetry install --all-extras - - name: Commit hooks - run: | - poetry run pre-commit run --all-files - - name: Run unit tests - run: | - set -o pipefail; poetry run pytest -v --cache-clear --junitxml=pytest.xml --cov-report=term-missing --cov=datadoc | tee pytest-coverage.txt - - name: Pytest coverage comment - id: coverageComment - # Always run - if: ${{ success() || failure() }} - uses: MishaKav/pytest-coverage-comment@main - with: - github-token: ${{github.token}} - pytest-coverage-path: ./pytest-coverage.txt - junitxml-path: ./pytest.xml - - name: Generate README Badge - # Always run - if: ${{ success() || failure() }} - uses: schneegans/dynamic-badges-action@v1.4.0 - with: - auth: ${{ secrets.COVERAGE_BADGE_GIST }} - gistID: 0c0c5bdfc360b59254f2c32d65914025 - filename: pytest-coverage-badge-datadoc.json - label: Coverage - message: ${{ steps.coverageComment.outputs.coverage }} - color: ${{ steps.coverageComment.outputs.color }} - namedLogo: python diff --git a/.github/workflows/update-dependencies.sh b/.github/workflows/update-dependencies.sh deleted file mode 100755 index f84046bb..00000000 --- a/.github/workflows/update-dependencies.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -# fail as soon as any command errors -set -euxo pipefail - -token=$1 -update_command=$2 -default_branch_name=$3 -update_path=$4 -on_changes_command=$5 -repo=$GITHUB_REPOSITORY #owner and repository: ie: user/repo -username=$GITHUB_ACTOR - -branch_name="automated-dependencies-update" -email="noreply@github.com" -remote_name="authenticated" -pr_title=":arrow_up: Update dependencies" - -if [ -z "$token" ]; then - echo "token is not defined" - exit 1 -fi - -if [ -z "$update_command" ]; then - echo "update-command cannot be empty" - exit 1 -fi - -# remove optional params markers -update_path_value=${update_path%?} -if [ -n "$update_path_value" ]; then - # if path is set, use that. otherwise default to current working directory - echo "Change directory to $update_path_value" - cd "$update_path_value" -fi - -# assumes the repo is already cloned as a prerequisite for running the script - -# fetch first to be able to detect if branch already exists -git fetch - - -# branch already exists, previous opened PR was not merged -if [ -z "$(git branch --list $branch_name)" ]; then - # create new branch - git checkout -b $branch_name -else - echo "Branch name $branch_name already exists" - - # check out existing branch - echo "Check out branch instead" - git checkout $branch_name - git pull -fi - -echo "Running update command $update_command" -eval "$update_command" - -if git --no-pager diff | grep diff; then - echo "Updates detected" - - if [[ -z ${NO_GIT_CONFIG_CHANGE+x} ]]; then - # configure git authorship - git config --global user.email $email - git config --global user.name "$username" - else - echo "NO_GIT_CONFIG_CHANGE was set, not modifying .gitconfig" - fi - - # Only add the remote if it doesn't already exist - if [[ $(git remote) == *$remote_name* ]]; then - echo "Remote already exists, skipping adding it." - else - echo "Adding remote $remote_name" - # format: https://[username]:[token]@github.com/[organization]/[repo].git - git remote add "$remote_name" "https://$username:$token@github.com/$repo.git" - fi - - # execute command to run when changes are deteced, if provided - on_changes_command_value=${on_changes_command%?} - echo "$on_changes_command_value" - if [ -n "$on_changes_command_value" ]; then - echo "Run post-update command" - eval "$on_changes_command_value" - fi - - # explicitly add all files including untracked - git add -A - - # commit the changes - git commit -a -m "$pr_title" --signoff - - # push the changes - git push "$remote_name" -f -u - - echo "https://api.github.com/repos/$repo/pulls" - - # create the PR - # if PR already exists, then update - response=$(curl --write-out "%{message}\n" -X POST -H "Content-Type: application/json" -H "Authorization: token $token" \ - --data '{"title":"'"$pr_title"'","head": "'"$branch_name"'","base":"'"$default_branch_name"'", "body":"Auto-generated pull request. \nThis pull request is generated by GitHub action based on the provided update commands."}' \ - "https://api.github.com/repos/$repo/pulls") - - echo "$response" - - if [[ "$response" == *"already exist"* ]]; then - echo "Pull request already opened. Updates were pushed to the existing PR instead" - exit 0 - fi -else - echo "No dependencies updates were detected" - exit 0 -fi diff --git a/.gitignore b/.gitignore index a67ec736..ee022a76 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,3 @@ -# The file created from the example parquet file -klargjorte_data/person_data_v1__DOC.json - -# Jetbrains IDE config -.idea/ - -# VSCode config -.vscode/ - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -136,3 +127,12 @@ dmypy.json # Pyre type checker .pyre/ + +# PyCharm +.idea/ + +/.python-version +/.pytype/ +/docs/_build/ + +**/.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7de3b104..6e8b9afd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,30 +1,29 @@ +default_language_version: + python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - - id: check-ast - - id: check-added-large-files - id: check-merge-conflict - id: check-case-conflict - - id: check-docstring-first - - id: check-json - - id: check-yaml - - id: debug-statements + - id: mixed-line-ending - id: end-of-file-fixer - id: trailing-whitespace - - id: mixed-line-ending - - repo: https://github.com/psf/black - rev: 23.7.0 + - id: check-added-large-files + - id: check-yaml + - id: check-json + - id: check-toml + - id: check-ast + + - repo: https://github.com/ambv/black + rev: '23.12.1' hooks: - id: black - # It is recommended to specify the latest version of Python - # supported by your project here, or alternatively use - # pre-commit's default_language_version, see - # https://pre-commit.com/#top_level-default_language_version - language_version: python3.11 + types: [python] + - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.0.283 + rev: v0.1.11 hooks: - id: ruff - args: [ --fix, --exit-non-zero-on-fix ] + types: [python] + args: [--fix] diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..ea9d361b --- /dev/null +++ b/.prettierignore @@ -0,0 +1,4 @@ +# Ignore this files since they are reindented with json.dump() and that does not +# match the prettier format. +.cookiecutter.json +.cruft.json diff --git a/.vscode/settings.json b/.vscode/settings.json index 5e625045..a3a18383 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,16 +1,7 @@ { - "python.linting.flake8Enabled": true, - "python.linting.flake8Args": [ - "--max-line-length=88", - "--ignore=E402,F841,F401,E302,E305,W503,E501" - ], - "python.linting.enabled": false, - "python.linting.pylintEnabled": false, - "python.linting.mypyEnabled": false, - "python.languageServer": "Pylance", - "python.testing.pytestArgs": [], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, - "python.formatting.provider": "black", - "editor.formatOnSave": true + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..b4630db6 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of + any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[mmw@ssb.no](mailto:mmw@ssb.no). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][mozilla coc]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][faq]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[mozilla coc]: https://github.com/mozilla/diversity +[faq]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..a7f4f3b9 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,139 @@ +# Contributor Guide + +Thank you for your interest in improving this project. +This project is open-source under the [MIT license] and +welcomes contributions in the form of bug reports, feature requests, and pull requests. + +Here is a list of important resources for contributors: + +- [Source Code] +- [Documentation] +- [Issue Tracker] +- [Code of Conduct] + +[mit license]: https://opensource.org/licenses/MIT +[source code]: https://github.com/statisticsnorway/datadoc +[documentation]: https://statisticsnorway.github.io/datadoc +[issue tracker]: https://github.com/statisticsnorway/datadoc/issues + +## How to report a bug + +Report bugs on the [Issue Tracker]. + +When filing an issue, make sure to answer these questions: + +- Which operating system and Python version are you using? +- Which version of this project are you using? +- What did you do? +- What did you expect to see? +- What did you see instead? + +The best way to get your bug fixed is to provide a test case, +and/or steps to reproduce the issue. + +## How to request a feature + +Request features on the [Issue Tracker]. + +## How to set up your development environment + +You need Python 3.10+ and the following tools: + +- [Poetry] +- [Nox] +- [nox-poetry] + +Install the package with development requirements: + +```console +poetry install +``` + +You can now run an interactive Python session, +or the command-line interface: + +```console +poetry run datadoc +``` + +[poetry]: https://python-poetry.org/ +[nox]: https://nox.thea.codes/ +[nox-poetry]: https://nox-poetry.readthedocs.io/ + +## How to test the project + +Run the full test suite: + +```console +nox +``` + +List the available Nox sessions: + +```console +nox --list-sessions +``` + +You can also run a specific Nox session. +For example, invoke the unit test suite like this: + +```console +nox --session=tests +``` + +Unit tests are located in the _tests_ directory, +and are written using the [pytest] testing framework. + +[pytest]: https://pytest.readthedocs.io/ + +## Running the Dockerized Application Locally + +```bash +docker run -p 8050:8050 \ +-v $HOME/.config/gcloud/application_default_credentials.json/:/application_default_credentials.json \ +-e GOOGLE_APPLICATION_CREDENTIALS="/application_default_credentials.json" \ +datadoc +``` + +### Release process + +Run the relevant version command on a branch e.g. + +```shell +poetry version patch +``` + +```shell +poetry version minor +``` + +Commit with message like `Bump version x.x.x -> y.y.y`. + +Open and merge a PR. + +## How to submit changes + +Open a [pull request] to submit changes to this project. + +Your pull request needs to meet the following guidelines for acceptance: + +- The Nox test suite must pass without errors and warnings. +- Include unit tests. This project maintains 100% code coverage. +- If your changes add functionality, update the documentation accordingly. + +Feel free to submit early, though—we can always iterate on this. + +To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command: + +```console +nox --session=pre-commit -- install +``` + +It is recommended to open an issue before starting work on anything. +This will allow a chance to talk it over with the owners and validate your approach. + +[pull request]: https://github.com/statisticsnorway/datadoc/pulls + + + +[code of conduct]: CODE_OF_CONDUCT.md diff --git a/Dockerfile b/Dockerfile index 96bc5dfe..ad049d70 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ ENV PATH="$POETRY_HOME/bin:$PATH" # Import our project files WORKDIR $APP_PATH COPY ./poetry.lock ./pyproject.toml ./README.md ./ -COPY ./$PACKAGE_NAME ./$PACKAGE_NAME +COPY ./src/$PACKAGE_NAME ./src/$PACKAGE_NAME RUN poetry build --format wheel RUN poetry export --format constraints.txt --output constraints.txt --without-hashes @@ -60,7 +60,7 @@ WORKDIR $APP_PATH COPY --from=build $APP_PATH/dist/*.whl ./ COPY --from=build $APP_PATH/constraints.txt ./ RUN pip install ./$APP_NAME*.whl --constraint constraints.txt -COPY ./$PACKAGE_NAME/gunicorn.conf.py ./ +COPY ./gunicorn.conf.py ./ # export environment variables for the CMD ENV PACKAGE_NAME=$PACKAGE_NAME diff --git a/LICENSE b/LICENSE index ade641ae..bcbb5237 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Statistics Norway +Copyright © 2022 Statistics Norway Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 0b6f58c7..f5f7ee44 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,75 @@ # Datadoc -![Datadoc Unit tests](https://github.com/statisticsnorway/datadoc/actions/workflows/unit-tests.yml/badge.svg) ![Code coverage](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/mmwinther/0c0c5bdfc360b59254f2c32d65914025/raw/pytest-coverage-badge-datadoc.json) [![PyPI version](https://img.shields.io/pypi/v/ssb-datadoc)](https://pypi.org/project/ssb-datadoc/) ![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg) +[![PyPI](https://img.shields.io/pypi/v/datadoc.svg)][pypi status] +[![Status](https://img.shields.io/pypi/status/datadoc.svg)][pypi status] +[![Python Version](https://img.shields.io/pypi/pyversions/datadoc)][pypi status] +[![License](https://img.shields.io/pypi/l/datadoc)][license] -Document datasets in Statistics Norway +[![Documentation](https://github.com/statisticsnorway/datadoc/actions/workflows/docs.yml/badge.svg)][documentation] +[![Tests](https://github.com/statisticsnorway/datadoc/actions/workflows/tests.yml/badge.svg)][tests] +[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_datadoc&metric=coverage)][sonarcov] +[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_datadoc&metric=alert_status)][sonarquality] -## Usage - -![DataDoc in use](./doc/change-language-example.gif) - -### From Jupyter - -1. Open or another Jupyter Lab environment -1. Datadoc comes preinstalled in Statistics Norway environments. Elsewhere, run Run `pip install ssb-datadoc` to install -1. Upload a dataset to your Jupyter server (e.g. ) -1. Run the [demo.ipynb](./demo.ipynb) Notebook -1. Datadoc will open in the notebook - -## Contributing - -### Local environment - -Poetry is used for dependency management. [Poe the Poet](https://github.com/nat-n/poethepoet) is used for running poe tasks within poetry's virtualenv. Upon cloning this project first install necessary dependencies, then run the tests to verify everything is working. - -#### 1. Prerequisites - -- Python >=3.10 -- Poetry, install via `curl -sSL https://install.python-poetry.org | python3 -` - -#### 2. Install dependencies - -```shell -poetry install -``` - -#### 3. Install pre-commit hooks +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit] +[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black] +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) +[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry] -```shell -poetry run pre-commit install -``` - -#### 4. Run tests - -```shell -poetry run poe test -``` +[pypi status]: https://pypi.org/project/ssb-datadoc/ +[documentation]: https://statisticsnorway.github.io/datadoc +[tests]: https://github.com/statisticsnorway/datadoc/actions?workflow=Tests -### Add dependencies - -#### Main - -```shell -poetry add -``` +[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_datadoc +[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_datadoc +[pre-commit]: https://github.com/pre-commit/pre-commit +[black]: https://github.com/psf/black +[poetry]: https://python-poetry.org/ -#### Dev +## Features -```shell -poetry add --group dev -``` +- Describe a dataset and its variables according to Statistics Norway's metadata model. +- Supports `parquet` and `sas7bdat` dataset files. +- Supports local file system and Google Cloud Storage buckets. -### Run project locally +## Installation -To run the project locally: +You can install _Datadoc_ via [pipx] from [PyPI]: -```shell -poetry run poe datadoc +```console +pipx install ssb-datadoc ``` -### Run project locally in Jupyter +## Usage -To run the project locally in Jupyter run: +Please see the [Reference Guide] for details. -```shell -poetry run poe jupyter -``` +## Contributing -A Jupyter instance should open in your browser. Open and run the cells in the `.ipynb` file to demo datadoc. +Contributions are very welcome. +To learn more, see the [Contributor Guide]. -## Running the Dockerized Application Locally +## License -```bash -docker run -p 8050:8050 \ --v $HOME/.config/gcloud/application_default_credentials.json/:/application_default_credentials.json \ --e GOOGLE_APPLICATION_CREDENTIALS="/application_default_credentials.json" \ -datadoc -``` +Distributed under the terms of the [MIT license][license], +_Datadoc_ is free and open source software. -### Release process +## Issues -Run the relevant version command on a branch e.g. +If you encounter any problems, +please [file an issue] along with a detailed description. -```shell -poetry version patch -``` +## Credits -```shell -poetry version minor -``` +This project was generated from [Statistics Norway]'s [SSB PyPI Template]. -Commit with message like `Bump version x.x.x -> y.y.y`. +[statistics norway]: https://www.ssb.no/en +[pypi]: https://pypi.org/ +[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate +[file an issue]: https://github.com/statisticsnorway/datadoc/issues +[pipx]: https://pipx.pypa.io/latest/installation/ -Open and merge a PR. + -Use Github to tag and release. +[license]: https://github.com/statisticsnorway/datadoc/blob/main/LICENSE +[contributor guide]: https://github.com/statisticsnorway/datadoc/blob/main/CONTRIBUTING.md +[reference guide]: https://statisticsnorway.github.io/datadoc/reference.html diff --git a/SECURITY.md b/SECURITY.md index c0e649fd..44f31d2f 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,4 +9,5 @@ the security and privacy of all our users. ## Reporting a Vulnerability If you believe you have found a security vulnerability in any of SSB's GitHub -repositories, please report it to us using the [Github Private vulnerability reporting tool](https://github.com/statisticsnorway/datadoc/security/advisories). +repositories, please report it to us using the +[Github Private vulnerability reporting tool](https://github.com/statisticsnorway/datadoc/security/advisories). diff --git a/datadoc/tests/resources/DataDoc_testdata.ipynb b/datadoc/tests/resources/DataDoc_testdata.ipynb deleted file mode 100644 index 134ff5f9..00000000 --- a/datadoc/tests/resources/DataDoc_testdata.ipynb +++ /dev/null @@ -1,88 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "53eee078-e8b9-4453-aaf4-8a78458ce867", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import pyarrow as pa\n", - "import pyarrow.parquet as pq\n", - "\n", - "# Create Parquet test datasets\n", - "person_data = pd.read_csv(\"person_data.csv\",\n", - " sep=';',\n", - " dtype={\n", - " 'pers_id': str,\n", - " 'tidspunkt': str,\n", - " 'sivilstand': str,\n", - " 'alm_inntekt': int,\n", - " 'sykepenger': int,\n", - " 'ber_bruttoformue': int,\n", - " 'fullf_utdanning': str,\n", - " 'hoveddiagnose': str\n", - " },\n", - " parse_dates=['tidspunkt']\n", - " )\n", - "print(person_data)\n", - "# print(person_data.info())\n", - "\n", - "person_data.to_parquet('person_data.parquet')\n", - "\n", - "#metadata = pq.read_metadata('person_data.parquet')\n", - "#print(metadata)\n", - "\n", - "#meta = pq.ParquetFile('person_data.parquet')\n", - "#print(meta.schema)\n", - "#for var in meta.schema:\n", - "# print(str(var.name) + \":\" + str(var.type) + \":\" + str(var.nullable))\n", - "\n", - "table = pq.read_table('person_data.parquet')\n", - "#print(table.schema)\n", - "#for var in table.schema:\n", - "# print(var)\n", - " \n", - "#print(\"Column names: {}\".format(table.column_names))\n", - "#print(\"Column types: {}\".format(table.types))\n", - "#print(\"Schema: {}\".format(table.schema))\n", - "\n", - "#print(table.schema.field(\"pers_id\").metadata)\n", - "for var in table.schema:\n", - " print(str(var.name) + \":\" + str(var.type) + \":\" + str(var.nullable))\n", - " #print(var.metadata)\n", - " #print(var.nullable)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21d3e7cd-fd09-4348-884b-130d206cf002", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Pyspark (local)", - "language": "python", - "name": "pyspark_local" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo.ipynb b/demo.ipynb deleted file mode 100644 index 77e08749..00000000 --- a/demo.ipynb +++ /dev/null @@ -1,81 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "5ef8c5e7-6637-4bbb-b79b-f11063f62d1b", - "metadata": {}, - "outputs": [], - "source": [ - "import datadoc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79d4190c", - "metadata": {}, - "outputs": [], - "source": [ - "# Dataset stored in a GCS bucket\n", - "datadoc.main(\"gs://ssb-staging-dapla-felles-data-delt/datadoc/klargjorte_data/person_data_v1.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03d11b90", - "metadata": {}, - "outputs": [], - "source": [ - "# Dataset stored in the local filesystem\n", - "datadoc.main(\"./klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b704012-bd79-4390-9902-f61b35329538", - "metadata": {}, - "outputs": [], - "source": [ - "# Open your dataset after the app has started\n", - "datadoc.main()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63b05fab-16e4-41af-9992-09fbaaa4eb20", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "datadoc", - "language": "python", - "name": "datadoc" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - }, - "vscode": { - "interpreter": { - "hash": "62e18fbeb7172c8fb6228a101ab782cf4345e9d504f491793f8010a13357a464" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/.DS_Store b/doc/.DS_Store deleted file mode 100644 index 5008ddfc..00000000 Binary files a/doc/.DS_Store and /dev/null differ diff --git a/doc/change-language-example.gif b/doc/change-language-example.gif deleted file mode 100644 index 64cbd97d..00000000 Binary files a/doc/change-language-example.gif and /dev/null differ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/codeofconduct.md b/docs/codeofconduct.md new file mode 100644 index 00000000..58fd373b --- /dev/null +++ b/docs/codeofconduct.md @@ -0,0 +1,3 @@ +```{include} ../CODE_OF_CONDUCT.md + +``` diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..6e7ad5c5 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,68 @@ +"""Sphinx configuration.""" +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + + +sys.path.insert(0, os.path.abspath("../src")) + +# -- Project information ----------------------------------------------------- + +project = "Datadoc" +copyright = "2022, Statistics Norway" +author = "Miles Winther" + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "myst_parser", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = [] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "monokai" + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "furo" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ["_static"] + + +# -- Other configuration --------------------------------------------------- + +# Show typehints as content of the function or method +autodoc_typehints = "description" + +# Do not prepend module names to object names +add_module_names = False diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..b9419640 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,7 @@ +```{include} ../CONTRIBUTING.md +--- +end-before: +--- +``` + +[code of conduct]: codeofconduct diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..2b83a013 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,22 @@ +```{include} ../README.md +--- +end-before: +--- +``` + +[license]: license +[contributor guide]: contributing +[command-line reference]: reference + +```{toctree} +--- +hidden: +maxdepth: 1 +--- + +reference +contributing +Code of Conduct +License +Changelog +``` diff --git a/docs/license.md b/docs/license.md new file mode 100644 index 00000000..218790f5 --- /dev/null +++ b/docs/license.md @@ -0,0 +1,7 @@ +# License + +```{literalinclude} ../LICENSE +--- +language: none +--- +``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..954237b9 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/reference.md b/docs/reference.md new file mode 100644 index 00000000..a51602e6 --- /dev/null +++ b/docs/reference.md @@ -0,0 +1,225 @@ +# Reference + + + +```{eval-rst} +datadoc package +=============== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + datadoc.backend + datadoc.frontend + + +datadoc.app module +------------------ + +.. automodule:: datadoc.app + :members: + :undoc-members: + :show-inheritance: + +datadoc.enums module +-------------------- + +.. automodule:: datadoc.enums + :members: + :undoc-members: + :show-inheritance: + +datadoc.state module +-------------------- + +.. automodule:: datadoc.state + :members: + :undoc-members: + :show-inheritance: + +datadoc.utils module +-------------------- + +.. automodule:: datadoc.utils + :members: + :undoc-members: + :show-inheritance: + +datadoc.wsgi module +------------------- + +.. automodule:: datadoc.wsgi + :members: + :undoc-members: + :show-inheritance: + +datadoc.backend package +======================= + + +datadoc.backend.datadoc\_metadata module +---------------------------------------- + +.. automodule:: datadoc.backend.datadoc_metadata + :members: + :undoc-members: + :show-inheritance: + +datadoc.backend.dataset\_parser module +-------------------------------------- + +.. automodule:: datadoc.backend.dataset_parser + :members: + :undoc-members: + :show-inheritance: + +datadoc.backend.model\_backwards\_compatibility module +------------------------------------------------------ + +.. automodule:: datadoc.backend.model_backwards_compatibility + :members: + :undoc-members: + :show-inheritance: + +datadoc.backend.storage\_adapter module +--------------------------------------- + +.. automodule:: datadoc.backend.storage_adapter + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend package +======================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + datadoc.frontend.callbacks + datadoc.frontend.components + datadoc.frontend.fields + +datadoc.frontend.callbacks package +================================== + + +datadoc.frontend.callbacks.dataset module +----------------------------------------- + +.. automodule:: datadoc.frontend.callbacks.dataset + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.callbacks.register\_callbacks module +----------------------------------------------------- + +.. automodule:: datadoc.frontend.callbacks.register_callbacks + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.callbacks.utils module +--------------------------------------- + +.. automodule:: datadoc.frontend.callbacks.utils + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.callbacks.variables module +------------------------------------------- + +.. automodule:: datadoc.frontend.callbacks.variables + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.components package +=================================== + + +datadoc.frontend.components.alerts module +----------------------------------------- + +.. automodule:: datadoc.frontend.components.alerts + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.components.builders module +------------------------------------------- + +.. automodule:: datadoc.frontend.components.builders + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.components.control\_bars module +------------------------------------------------ + +.. automodule:: datadoc.frontend.components.control_bars + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.components.dataset\_tab module +----------------------------------------------- + +.. automodule:: datadoc.frontend.components.dataset_tab + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.components.variables\_tab module +------------------------------------------------- + +.. automodule:: datadoc.frontend.components.variables_tab + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.fields package +=============================== + + +datadoc.frontend.fields.display\_base module +-------------------------------------------- + +.. automodule:: datadoc.frontend.fields.display_base + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.fields.display\_dataset module +----------------------------------------------- + +.. automodule:: datadoc.frontend.fields.display_dataset + :members: + :undoc-members: + :show-inheritance: + +datadoc.frontend.fields.display\_variables module +------------------------------------------------- + +.. automodule:: datadoc.frontend.fields.display_variables + :members: + :undoc-members: + :show-inheritance: + +``` diff --git a/docs/templates/package.rst_t b/docs/templates/package.rst_t new file mode 100644 index 00000000..89ac240d --- /dev/null +++ b/docs/templates/package.rst_t @@ -0,0 +1,48 @@ +{%- macro automodule(modname, options) -%} +.. automodule:: {{ modname }} +{%- for option in options %} + :{{ option }}: +{%- endfor %} +{%- endmacro %} + +{%- macro toctree(docnames) -%} +.. toctree:: + :maxdepth: {{ maxdepth }} +{% for docname in docnames %} + {{ docname }} +{%- endfor %} +{%- endmacro %} + +{%- if is_namespace %} +{{- [pkgname, "namespace"] | join(" ") | e | heading }} +{% else %} +{{- [pkgname, "package"] | join(" ") | e | heading }} +{% endif %} + +{%- if is_namespace %} +.. py:module:: {{ pkgname }} +{% endif %} + +{%- if modulefirst and not is_namespace %} +{{ automodule(pkgname, automodule_options) }} +{% endif %} + +{%- if subpackages %} +Subpackages +----------- + +{{ toctree(subpackages) }} +{% endif %} + +{%- if submodules %} +{% if separatemodules %} +{{ toctree(submodules) }} +{% else %} +{%- for submodule in submodules %} +{% if show_headings %} +{{- [submodule, "module"] | join(" ") | e | heading(2) }} +{% endif %} +{{ automodule(submodule, automodule_options) }} +{% endfor %} +{%- endif %} +{%- endif %} diff --git a/datadoc/gunicorn.conf.py b/gunicorn.conf.py similarity index 60% rename from datadoc/gunicorn.conf.py rename to gunicorn.conf.py index 77961c08..8682a1b0 100644 --- a/datadoc/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -1,4 +1,4 @@ -"""Configuraion for the Gunicorn server.""" +"""Configuration for the Gunicorn server.""" bind = "0.0.0.0:8050" workers = 1 diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 00000000..4e4fa524 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,252 @@ +"""Nox sessions.""" +import os +import shlex +import shutil +import sys +from pathlib import Path +from textwrap import dedent + +import nox + + +try: + from nox_poetry import Session + from nox_poetry import session +except ImportError: + message = f"""\ + Nox failed to import the 'nox-poetry' package. + + Please install it using the following command: + + {sys.executable} -m pip install nox-poetry""" + raise SystemExit(dedent(message)) from None + +package = "datadoc" +python_versions = ["3.10", "3.11", "3.12"] +nox.needs_version = ">= 2021.6.6" +nox.options.sessions = ( + "pre-commit", + "mypy", + "tests", + "typeguard", + "xdoctest", + "docs-build", +) + + +def activate_virtualenv_in_precommit_hooks(session: Session) -> None: + """Activate virtualenv in hooks installed by pre-commit. + + This function patches git hooks installed by pre-commit to activate the + session's virtual environment. This allows pre-commit to locate hooks in + that environment when invoked from git. + + Args: + session: The Session object. + """ + assert session.bin is not None # nosec + + # Only patch hooks containing a reference to this session's bindir. Support + # quoting rules for Python and bash, but strip the outermost quotes so we + # can detect paths within the bindir, like /python. + bindirs = [ + bindir[1:-1] if bindir[0] in "'\"" else bindir + for bindir in (repr(session.bin), shlex.quote(session.bin)) + ] + + virtualenv = session.env.get("VIRTUAL_ENV") + if virtualenv is None: + return + + headers = { + # pre-commit < 2.16.0 + "python": f"""\ + import os + os.environ["VIRTUAL_ENV"] = {virtualenv!r} + os.environ["PATH"] = os.pathsep.join(( + {session.bin!r}, + os.environ.get("PATH", ""), + )) + """, + # pre-commit >= 2.16.0 + "bash": f"""\ + VIRTUAL_ENV={shlex.quote(virtualenv)} + PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH" + """, + # pre-commit >= 2.17.0 on Windows forces sh shebang + "/bin/sh": f"""\ + VIRTUAL_ENV={shlex.quote(virtualenv)} + PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH" + """, + } + + hookdir = Path(".git") / "hooks" + if not hookdir.is_dir(): + return + + for hook in hookdir.iterdir(): + if hook.name.endswith(".sample") or not hook.is_file(): + continue + + if not hook.read_bytes().startswith(b"#!"): + continue + + text = hook.read_text() + + if not is_bindir_in_text(bindirs, text): + continue + + lines = text.splitlines() + hook.write_text(insert_header_in_hook(headers, lines)) + + +def is_bindir_in_text(bindirs: list[str], text: str) -> bool: + """Helper function to check if bindir is in text.""" + return any( + Path("A") == Path("a") and bindir.lower() in text.lower() or bindir in text + for bindir in bindirs + ) + + +def insert_header_in_hook(header: dict[str, str], lines: list[str]) -> str: + """Helper function to insert headers in hook's text.""" + for executable, header_text in header.items(): + if executable in lines[0].lower(): + lines.insert(1, dedent(header_text)) + return "\n".join(lines) + return "\n".join(lines) + + +@session(name="pre-commit", python=python_versions[1]) +def precommit(session: Session) -> None: + """Lint using pre-commit.""" + args = session.posargs or [ + "run", + "--all-files", + "--hook-stage=manual", + "--show-diff-on-failure", + ] + session.install( + "pre-commit", + "pre-commit-hooks", + "ruff", + "black", + ) + session.run("pre-commit", *args) + if args and args[0] == "install": + activate_virtualenv_in_precommit_hooks(session) + + +@session(python=python_versions) +def mypy(session: Session) -> None: + """Type-check using mypy.""" + args = session.posargs or ["src", "tests"] + session.install(".") + session.install( + "mypy", + "pytest", + "types-setuptools", + "pandas-stubs", + "types-Pygments", + "types-colorama", + ) + session.run("mypy", *args) + if not session.posargs: + session.run("mypy", f"--python-executable={sys.executable}", "noxfile.py") + + +@session(python=python_versions) +def tests(session: Session) -> None: + """Run the test suite.""" + session.install(".") + session.install("coverage[toml]", "pytest", "pygments", "pytest-mock") + try: + session.run( + "coverage", + "run", + "--parallel", + "-m", + "pytest", + "-o", + "pythonpath=", + *session.posargs, + ) + finally: + if session.interactive: + session.notify("coverage", posargs=[]) + + +@session(python=python_versions[1]) +def coverage(session: Session) -> None: + """Produce the coverage report.""" + args = session.posargs or ["report", "--skip-empty"] + + session.install("coverage[toml]") + + if not session.posargs and any(Path().glob(".coverage.*")): + session.run("coverage", "combine") + + session.run("coverage", *args) + + +@session(python=python_versions[1]) +def typeguard(session: Session) -> None: + """Runtime type checking using Typeguard.""" + session.install(".") + session.install("pytest", "typeguard", "pygments", "pytest_mock") + session.run("pytest", f"--typeguard-packages={package}", *session.posargs) + + +@session(python=python_versions) +def xdoctest(session: Session) -> None: + """Run examples with xdoctest.""" + if session.posargs: + args = [package, *session.posargs] + else: + args = [f"--modname={package}", "--command=all"] + if "FORCE_COLOR" in os.environ: + args.append("--colored=1") + + session.install(".") + session.install("xdoctest[colors]") + session.run("python", "-m", "xdoctest", *args) + + +@session(name="docs-build", python=python_versions[1]) +def docs_build(session: Session) -> None: + """Build the documentation.""" + args = session.posargs or ["docs", "docs/_build"] + if not session.posargs and "FORCE_COLOR" in os.environ: + args.insert(0, "--color") + + session.install(".") + session.install( + "sphinx", "sphinx-autodoc-typehints", "sphinx-click", "furo", "myst-parser" + ) + + build_dir = Path("docs", "_build") + if build_dir.exists(): + shutil.rmtree(build_dir) + + session.run("sphinx-build", *args) + + +@session(python=python_versions[1]) +def docs(session: Session) -> None: + """Build and serve the documentation with live reloading on file changes.""" + args = session.posargs or ["--open-browser", "docs", "docs/_build"] + session.install(".") + session.install( + "sphinx", + "sphinx-autobuild", + "sphinx-autodoc-typehints", + "sphinx-click", + "furo", + "myst-parser", + ) + + build_dir = Path("docs", "_build") + if build_dir.exists(): + shutil.rmtree(build_dir) + + session.run("sphinx-autobuild", *args) diff --git a/poetry.lock b/poetry.lock index 2501a3ba..c6873059 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -110,6 +110,17 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "alabaster" +version = "0.7.16" +description = "A light, configurable Sphinx theme" +optional = false +python-versions = ">=3.9" +files = [ + {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"}, + {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"}, +] + [[package]] name = "alembic" version = "1.13.1" @@ -214,6 +225,38 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "babel" +version = "2.14.0" +description = "Internationalization utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Babel-2.14.0-py3-none-any.whl", hash = "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287"}, + {file = "Babel-2.14.0.tar.gz", hash = "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363"}, +] + +[package.extras] +dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] + +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "23.12.1" @@ -781,6 +824,17 @@ files = [ {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] +[[package]] +name = "docutils" +version = "0.20.1" +description = "Docutils -- Python Documentation Utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6"}, + {file = "docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b"}, +] + [[package]] name = "exceptiongroup" version = "1.2.0" @@ -996,6 +1050,23 @@ smb = ["smbprotocol"] ssh = ["paramiko"] tqdm = ["tqdm"] +[[package]] +name = "furo" +version = "2023.9.10" +description = "A clean customisable Sphinx documentation theme." +optional = false +python-versions = ">=3.8" +files = [ + {file = "furo-2023.9.10-py3-none-any.whl", hash = "sha256:513092538537dc5c596691da06e3c370714ec99bc438680edc1debffb73e5bfc"}, + {file = "furo-2023.9.10.tar.gz", hash = "sha256:5707530a476d2a63b8cad83b4f961f3739a69f4b058bcf38a03a39fa537195b2"}, +] + +[package.dependencies] +beautifulsoup4 = "*" +pygments = ">=2.7" +sphinx = ">=6.0,<8.0" +sphinx-basic-ng = "*" + [[package]] name = "gcsfs" version = "2023.12.2.post1" @@ -1036,20 +1107,20 @@ smmap = ">=3.0.1,<6" [[package]] name = "gitpython" -version = "3.1.40" +version = "3.1.41" description = "GitPython is a Python library used to interact with Git repositories" optional = false python-versions = ">=3.7" files = [ - {file = "GitPython-3.1.40-py3-none-any.whl", hash = "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a"}, - {file = "GitPython-3.1.40.tar.gz", hash = "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4"}, + {file = "GitPython-3.1.41-py3-none-any.whl", hash = "sha256:c36b6634d069b3f719610175020a9aed919421c87552185b085e04fbbdb10b7c"}, + {file = "GitPython-3.1.41.tar.gz", hash = "sha256:ed66e624884f76df22c8e16066d567aaa5a37d5b5fa19db2c6df6f7156db9048"}, ] [package.dependencies] gitdb = ">=4.0.1,<5" [package.extras] -test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "sumtypes"] [[package]] name = "google-api-core" @@ -1083,13 +1154,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.26.1" +version = "2.26.2" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.26.1.tar.gz", hash = "sha256:54385acca5c0fbdda510cd8585ba6f3fcb06eeecf8a6ecca39d3ee148b092590"}, - {file = "google_auth-2.26.1-py2.py3-none-any.whl", hash = "sha256:2c8b55e3e564f298122a02ab7b97458ccfcc5617840beb5d0ac757ada92c9780"}, + {file = "google-auth-2.26.2.tar.gz", hash = "sha256:97327dbbf58cccb58fc5a1712bba403ae76668e64814eb30f7316f7e27126b81"}, + {file = "google_auth-2.26.2-py2.py3-none-any.whl", hash = "sha256:3f445c8ce9b61ed6459aad86d8ccdba4a9afed841b2d1451a11ef4db08957424"}, ] [package.dependencies] @@ -1517,6 +1588,17 @@ files = [ {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, ] +[[package]] +name = "imagesize" +version = "1.4.1" +description = "Getting image size from png/jpeg/jpeg2000/gif file" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"}, + {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"}, +] + [[package]] name = "importlib-metadata" version = "7.0.1" @@ -1615,13 +1697,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jinja2" -version = "3.1.2" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -1733,6 +1815,21 @@ traitlets = ">=4.3.2" [package.extras] test = ["beautifulsoup4[html5lib]", "coverage", "cryptography", "jsonschema", "jupyterlab (>=3)", "mock", "nbclassic", "playwright", "pytest (>=3.3)", "pytest-asyncio (>=0.17)", "pytest-cov", "requests-mock", "virtualenv"] +[[package]] +name = "livereload" +version = "2.6.3" +description = "Python LiveReload is an awesome tool for web developers" +optional = false +python-versions = "*" +files = [ + {file = "livereload-2.6.3-py2.py3-none-any.whl", hash = "sha256:ad4ac6f53b2d62bb6ce1a5e6e96f1f00976a32348afedcb4b6d68df2a1d346e4"}, + {file = "livereload-2.6.3.tar.gz", hash = "sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869"}, +] + +[package.dependencies] +six = "*" +tornado = {version = "*", markers = "python_version > \"2.7\""} + [[package]] name = "lxml" version = "5.1.0" @@ -1841,6 +1938,30 @@ babel = ["Babel"] lingua = ["lingua"] testing = ["pytest"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" version = "2.1.3" @@ -1924,6 +2045,36 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "mdit-py-plugins" +version = "0.4.0" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mdit_py_plugins-0.4.0-py3-none-any.whl", hash = "sha256:b51b3bb70691f57f974e257e367107857a93b36f322a9e6d44ca5bf28ec2def9"}, + {file = "mdit_py_plugins-0.4.0.tar.gz", hash = "sha256:d8ab27e9aed6c38aa716819fedfde15ca275715955f8a185a8e1cf90fb1d2c1b"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<4.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["myst-parser", "sphinx-book-theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "multidict" version = "6.0.4" @@ -2065,6 +2216,32 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "myst-parser" +version = "2.0.0" +description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser," +optional = false +python-versions = ">=3.8" +files = [ + {file = "myst_parser-2.0.0-py3-none-any.whl", hash = "sha256:7c36344ae39c8e740dad7fdabf5aa6fc4897a813083c6cc9990044eb93656b14"}, + {file = "myst_parser-2.0.0.tar.gz", hash = "sha256:ea929a67a6a0b1683cdbe19b8d2e724cd7643f8aa3e7bb18dd65beac3483bead"}, +] + +[package.dependencies] +docutils = ">=0.16,<0.21" +jinja2 = "*" +markdown-it-py = ">=3.0,<4.0" +mdit-py-plugins = ">=0.4,<1.0" +pyyaml = "*" +sphinx = ">=6,<8" + +[package.extras] +code-style = ["pre-commit (>=3.0,<4.0)"] +linkify = ["linkify-it-py (>=2.0,<3.0)"] +rtd = ["ipython", "pydata-sphinx-theme (==v0.13.0rc4)", "sphinx-autodoc2 (>=0.4.2,<0.5.0)", "sphinx-book-theme (==1.0.0rc2)", "sphinx-copybutton", "sphinx-design2", "sphinx-pyscript", "sphinx-tippy (>=0.3.1)", "sphinx-togglebutton", "sphinxext-opengraph (>=0.8.2,<0.9.0)", "sphinxext-rediraffe (>=0.2.7,<0.3.0)"] +testing = ["beautifulsoup4", "coverage[toml]", "pytest (>=7,<8)", "pytest-cov", "pytest-param-files (>=0.3.4,<0.4.0)", "pytest-regressions", "sphinx-pytest"] +testing-docutils = ["pygments", "pytest (>=7,<8)", "pytest-param-files (>=0.3.4,<0.4.0)"] + [[package]] name = "nbformat" version = "5.9.2" @@ -2276,6 +2453,21 @@ sql-other = ["SQLAlchemy (>=1.4.36)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.8.0)"] +[[package]] +name = "pandas-stubs" +version = "2.1.4.231227" +description = "Type annotations for pandas" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas_stubs-2.1.4.231227-py3-none-any.whl", hash = "sha256:211fc23e6ae87073bdf41dbf362c4a4d85e1e3477cb078dbac3da6c7fdaefba8"}, + {file = "pandas_stubs-2.1.4.231227.tar.gz", hash = "sha256:3ea29ef001e9e44985f5ebde02d4413f94891ef6ec7e5056fb07d125be796c23"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version < \"3.13\""} +types-pytz = ">=2022.1.1" + [[package]] name = "parso" version = "0.8.3" @@ -2291,17 +2483,6 @@ files = [ qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] testing = ["docopt", "pytest (<6.0.0)"] -[[package]] -name = "pastel" -version = "0.2.1" -description = "Bring colors to your terminal." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "pastel-0.2.1-py2.py3-none-any.whl", hash = "sha256:4349225fcdf6c2bb34d483e523475de5bb04a5c10ef711263452cb37d7dd4364"}, - {file = "pastel-0.2.1.tar.gz", hash = "sha256:e6581ac04e973cac858828c6202c1e1e81fee1dc7de7683f3e1ffe0bfd8a573d"}, -] - [[package]] name = "pathspec" version = "0.12.1" @@ -2372,24 +2553,6 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] -[[package]] -name = "poethepoet" -version = "0.24.4" -description = "A task runner that works well with poetry." -optional = false -python-versions = ">=3.8" -files = [ - {file = "poethepoet-0.24.4-py3-none-any.whl", hash = "sha256:fb4ea35d7f40fe2081ea917d2e4102e2310fda2cde78974050ca83896e229075"}, - {file = "poethepoet-0.24.4.tar.gz", hash = "sha256:ff4220843a87c888cbcb5312c8905214701d0af60ac7271795baa8369b428fef"}, -] - -[package.dependencies] -pastel = ">=0.2.1,<0.3.0" -tomli = ">=1.2.2" - -[package.extras] -poetry-plugin = ["poetry (>=1.0,<2.0)"] - [[package]] name = "pre-commit" version = "3.6.0" @@ -2455,22 +2618,22 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] [[package]] name = "protobuf" -version = "4.25.1" +version = "4.25.2" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"}, - {file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"}, - {file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"}, - {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"}, - {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"}, - {file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"}, - {file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"}, - {file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"}, - {file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"}, - {file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"}, - {file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"}, + {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"}, + {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"}, + {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"}, + {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"}, + {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"}, + {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"}, + {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"}, + {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"}, + {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, ] [[package]] @@ -3316,6 +3479,237 @@ files = [ {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, ] +[[package]] +name = "snowballstemmer" +version = "2.2.0" +description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +optional = false +python-versions = "*" +files = [ + {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"}, + {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, +] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[[package]] +name = "sphinx" +version = "7.2.6" +description = "Python documentation generator" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinx-7.2.6-py3-none-any.whl", hash = "sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560"}, + {file = "sphinx-7.2.6.tar.gz", hash = "sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5"}, +] + +[package.dependencies] +alabaster = ">=0.7,<0.8" +babel = ">=2.9" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +docutils = ">=0.18.1,<0.21" +imagesize = ">=1.3" +Jinja2 = ">=3.0" +packaging = ">=21.0" +Pygments = ">=2.14" +requests = ">=2.25.0" +snowballstemmer = ">=2.0" +sphinxcontrib-applehelp = "*" +sphinxcontrib-devhelp = "*" +sphinxcontrib-htmlhelp = ">=2.0.0" +sphinxcontrib-jsmath = "*" +sphinxcontrib-qthelp = "*" +sphinxcontrib-serializinghtml = ">=1.1.9" + +[package.extras] +docs = ["sphinxcontrib-websupport"] +lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] +test = ["cython (>=3.0)", "filelock", "html5lib", "pytest (>=4.6)", "setuptools (>=67.0)"] + +[[package]] +name = "sphinx-autobuild" +version = "2021.3.14" +description = "Rebuild Sphinx documentation on changes, with live-reload in the browser." +optional = false +python-versions = ">=3.6" +files = [ + {file = "sphinx-autobuild-2021.3.14.tar.gz", hash = "sha256:de1ca3b66e271d2b5b5140c35034c89e47f263f2cd5db302c9217065f7443f05"}, + {file = "sphinx_autobuild-2021.3.14-py3-none-any.whl", hash = "sha256:8fe8cbfdb75db04475232f05187c776f46f6e9e04cacf1e49ce81bdac649ccac"}, +] + +[package.dependencies] +colorama = "*" +livereload = "*" +sphinx = "*" + +[package.extras] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "sphinx-autodoc-typehints" +version = "1.25.2" +description = "Type hints (PEP 484) support for the Sphinx autodoc extension" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinx_autodoc_typehints-1.25.2-py3-none-any.whl", hash = "sha256:5ed05017d23ad4b937eab3bee9fae9ab0dd63f0b42aa360031f1fad47e47f673"}, + {file = "sphinx_autodoc_typehints-1.25.2.tar.gz", hash = "sha256:3cabc2537e17989b2f92e64a399425c4c8bf561ed73f087bc7414a5003616a50"}, +] + +[package.dependencies] +sphinx = ">=7.1.2" + +[package.extras] +docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)"] +numpy = ["nptyping (>=2.5)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "sphobjinv (>=2.3.1)", "typing-extensions (>=4.7.1)"] + +[[package]] +name = "sphinx-basic-ng" +version = "1.0.0b2" +description = "A modern skeleton for Sphinx themes." +optional = false +python-versions = ">=3.7" +files = [ + {file = "sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b"}, + {file = "sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9"}, +] + +[package.dependencies] +sphinx = ">=4.0" + +[package.extras] +docs = ["furo", "ipython", "myst-parser", "sphinx-copybutton", "sphinx-inline-tabs"] + +[[package]] +name = "sphinx-click" +version = "5.1.0" +description = "Sphinx extension that automatically documents click applications" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sphinx-click-5.1.0.tar.gz", hash = "sha256:6812c2db62d3fae71a4addbe5a8a0a16c97eb491f3cd63fe34b4ed7e07236f33"}, + {file = "sphinx_click-5.1.0-py3-none-any.whl", hash = "sha256:ae97557a4e9ec646045089326c3b90e026c58a45e083b8f35f17d5d6558d08a0"}, +] + +[package.dependencies] +click = ">=7.0" +docutils = "*" +sphinx = ">=2.0" + +[[package]] +name = "sphinxcontrib-applehelp" +version = "1.0.7" +description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_applehelp-1.0.7-py3-none-any.whl", hash = "sha256:094c4d56209d1734e7d252f6e0b3ccc090bd52ee56807a5d9315b19c122ab15d"}, + {file = "sphinxcontrib_applehelp-1.0.7.tar.gz", hash = "sha256:39fdc8d762d33b01a7d8f026a3b7d71563ea3b72787d5f00ad8465bd9d6dfbfa"}, +] + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "1.0.5" +description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_devhelp-1.0.5-py3-none-any.whl", hash = "sha256:fe8009aed765188f08fcaadbb3ea0d90ce8ae2d76710b7e29ea7d047177dae2f"}, + {file = "sphinxcontrib_devhelp-1.0.5.tar.gz", hash = "sha256:63b41e0d38207ca40ebbeabcf4d8e51f76c03e78cd61abe118cf4435c73d4212"}, +] + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.0.4" +description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_htmlhelp-2.0.4-py3-none-any.whl", hash = "sha256:8001661c077a73c29beaf4a79968d0726103c5605e27db92b9ebed8bab1359e9"}, + {file = "sphinxcontrib_htmlhelp-2.0.4.tar.gz", hash = "sha256:6c26a118a05b76000738429b724a0568dbde5b72391a688577da08f11891092a"}, +] + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["html5lib", "pytest"] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +description = "A sphinx extension which renders display math in HTML via JavaScript" +optional = false +python-versions = ">=3.5" +files = [ + {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"}, + {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"}, +] + +[package.extras] +test = ["flake8", "mypy", "pytest"] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "1.0.6" +description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_qthelp-1.0.6-py3-none-any.whl", hash = "sha256:bf76886ee7470b934e363da7a954ea2825650013d367728588732c7350f49ea4"}, + {file = "sphinxcontrib_qthelp-1.0.6.tar.gz", hash = "sha256:62b9d1a186ab7f5ee3356d906f648cacb7a6bdb94d201ee7adf26db55092982d"}, +] + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "1.1.9" +description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "sphinxcontrib_serializinghtml-1.1.9-py3-none-any.whl", hash = "sha256:9b36e503703ff04f20e9675771df105e58aa029cfcbc23b8ed716019b7416ae1"}, + {file = "sphinxcontrib_serializinghtml-1.1.9.tar.gz", hash = "sha256:0c64ff898339e1fac29abd2bf5f11078f3ec413cfe9c046d3120d7ca65530b54"}, +] + +[package.dependencies] +Sphinx = ">=5" + +[package.extras] +lint = ["docutils-stubs", "flake8", "mypy"] +test = ["pytest"] + [[package]] name = "sqlalchemy" version = "2.0.25" @@ -3496,6 +3890,83 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "typeguard" +version = "4.1.5" +description = "Run-time type checker for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typeguard-4.1.5-py3-none-any.whl", hash = "sha256:8923e55f8873caec136c892c3bed1f676eae7be57cdb94819281b3d3bc9c0953"}, + {file = "typeguard-4.1.5.tar.gz", hash = "sha256:ea0a113bbc111bcffc90789ebb215625c963411f7096a7e9062d4e4630c155fd"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] +test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] + +[[package]] +name = "types-colorama" +version = "0.4.15.20240106" +description = "Typing stubs for colorama" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-colorama-0.4.15.20240106.tar.gz", hash = "sha256:49096b4c4cbfcaa11699a0470c36e4f5631f193fb980188e013ea64445d35656"}, + {file = "types_colorama-0.4.15.20240106-py3-none-any.whl", hash = "sha256:18294bc18f60dc0b4895de8119964a5d895f5e180c2d1308fdd33009c0fa0f38"}, +] + +[[package]] +name = "types-docutils" +version = "0.20.0.20240106" +description = "Typing stubs for docutils" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-docutils-0.20.0.20240106.tar.gz", hash = "sha256:03992ec976fbe080db588e1e56a83c5e4aba5c733022b25bb26cb84397b96049"}, + {file = "types_docutils-0.20.0.20240106-py3-none-any.whl", hash = "sha256:d408f9305761905b157ea3cb80f53d4026bce7d4cc47312939118715467d0278"}, +] + +[[package]] +name = "types-pygments" +version = "2.17.0.20240106" +description = "Typing stubs for Pygments" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-Pygments-2.17.0.20240106.tar.gz", hash = "sha256:92e62ac37793e567cd2b0f64f1456c24fccce4041d9c5f869697a6739fde4fce"}, + {file = "types_Pygments-2.17.0.20240106-py3-none-any.whl", hash = "sha256:8052c574b0ab8f2dc94bdc4a31b9d48e8aa5a0f12398ef40cadadbe551da949b"}, +] + +[package.dependencies] +types-docutils = "*" +types-setuptools = "*" + +[[package]] +name = "types-pytz" +version = "2023.3.1.1" +description = "Typing stubs for pytz" +optional = false +python-versions = "*" +files = [ + {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"}, + {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"}, +] + +[[package]] +name = "types-setuptools" +version = "69.0.0.20240106" +description = "Typing stubs for setuptools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-setuptools-69.0.0.20240106.tar.gz", hash = "sha256:e077f9089578df3c9938f6e4aa1633f182ba6740a6fdb1333f162bae5dfcbadc"}, + {file = "types_setuptools-69.0.0.20240106-py3-none-any.whl", hash = "sha256:b1da8981425723a674fd459c43dfa4402abeaee3f9cf682723ee9cf226125cc3"}, +] + [[package]] name = "typing-extensions" version = "4.9.0" @@ -3582,6 +4053,33 @@ MarkupSafe = ">=2.1.1" [package.extras] watchdog = ["watchdog (>=2.3)"] +[[package]] +name = "xdoctest" +version = "1.1.2" +description = "A rewrite of the builtin doctest module" +optional = false +python-versions = ">=3.6" +files = [ + {file = "xdoctest-1.1.2-py3-none-any.whl", hash = "sha256:ebe133222534f09597cbe461f97cc5f95ad7b36e5d31f3437caffb9baaddbddb"}, + {file = "xdoctest-1.1.2.tar.gz", hash = "sha256:267d3d4e362547fa917d3deabaf6888232bbf43c8d30298faeb957dbfa7e0ba3"}, +] + +[package.dependencies] +colorama = {version = "*", optional = true, markers = "platform_system == \"Windows\" and extra == \"colors\""} +Pygments = {version = "*", optional = true, markers = "python_version >= \"3.5.0\" and extra == \"colors\""} + +[package.extras] +all = ["IPython (>=7.10.0)", "IPython (>=7.23.1)", "Pygments (>=2.0.0)", "Pygments (>=2.4.1)", "attrs (>=19.2.0)", "colorama (>=0.4.1)", "debugpy (>=1.0.0)", "debugpy (>=1.0.0)", "debugpy (>=1.0.0)", "debugpy (>=1.3.0)", "debugpy (>=1.6.0)", "ipykernel (>=5.2.0)", "ipykernel (>=6.0.0)", "ipykernel (>=6.11.0)", "ipython-genutils (>=0.2.0)", "jedi (>=0.16)", "jinja2 (>=3.0.0)", "jupyter-client (>=6.1.5)", "jupyter-client (>=7.0.0)", "jupyter-core (>=4.7.0)", "nbconvert (>=6.0.0)", "nbconvert (>=6.1.0)", "pyflakes (>=2.2.0)", "pytest (>=4.6.0)", "pytest (>=4.6.0)", "pytest (>=6.2.5)", "pytest-cov (>=3.0.0)", "tomli (>=0.2.0)", "typing (>=3.7.4)"] +all-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipykernel (==6.11.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "nbconvert (==6.1.0)", "pyflakes (==2.2.0)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "tomli (==0.2.0)", "typing (==3.7.4)"] +colors = ["Pygments", "Pygments", "colorama"] +jupyter = ["IPython", "IPython", "attrs", "debugpy", "debugpy", "debugpy", "debugpy", "debugpy", "ipykernel", "ipykernel", "ipykernel", "ipython-genutils", "jedi", "jinja2", "jupyter-client", "jupyter-client", "jupyter-core", "nbconvert", "nbconvert"] +optional = ["IPython (>=7.10.0)", "IPython (>=7.23.1)", "Pygments (>=2.0.0)", "Pygments (>=2.4.1)", "attrs (>=19.2.0)", "colorama (>=0.4.1)", "debugpy (>=1.0.0)", "debugpy (>=1.0.0)", "debugpy (>=1.0.0)", "debugpy (>=1.3.0)", "debugpy (>=1.6.0)", "ipykernel (>=5.2.0)", "ipykernel (>=6.0.0)", "ipykernel (>=6.11.0)", "ipython-genutils (>=0.2.0)", "jedi (>=0.16)", "jinja2 (>=3.0.0)", "jupyter-client (>=6.1.5)", "jupyter-client (>=7.0.0)", "jupyter-core (>=4.7.0)", "nbconvert (>=6.0.0)", "nbconvert (>=6.1.0)", "pyflakes (>=2.2.0)", "tomli (>=0.2.0)"] +optional-strict = ["IPython (==7.10.0)", "IPython (==7.23.1)", "Pygments (==2.0.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "colorama (==0.4.1)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.3.0)", "debugpy (==1.6.0)", "ipykernel (==5.2.0)", "ipykernel (==6.0.0)", "ipykernel (==6.11.0)", "ipython-genutils (==0.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-client (==6.1.5)", "jupyter-client (==7.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "nbconvert (==6.1.0)", "pyflakes (==2.2.0)", "tomli (==0.2.0)"] +tests = ["pytest (>=4.6.0)", "pytest (>=4.6.0)", "pytest (>=6.2.5)", "pytest-cov (>=3.0.0)", "typing (>=3.7.4)"] +tests-binary = ["cmake", "cmake", "ninja", "ninja", "pybind11", "pybind11", "scikit-build", "scikit-build"] +tests-binary-strict = ["cmake (==3.21.2)", "cmake (==3.25.0)", "ninja (==1.10.2)", "ninja (==1.11.1)", "pybind11 (==2.10.3)", "pybind11 (==2.7.1)", "scikit-build (==0.11.1)", "scikit-build (==0.16.1)"] +tests-strict = ["pytest (==4.6.0)", "pytest (==4.6.0)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)", "typing (==3.7.4)"] + [[package]] name = "yarl" version = "1.9.4" @@ -3703,4 +4201,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "2c4efd09852bd4b4d74640d9ae160d07797229f2d4886072dd48780bab249dc5" +content-hash = "dc2373a263a03fd19c1ded045c7e8e21281783a5bbb9d8e64b5e843b7e1c9333" diff --git a/pyproject.toml b/pyproject.toml index c42e3008..d7e8322d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,19 +5,23 @@ description = "Document dataset metadata. For use in Statistics Norway's metadat authors = ["Statistics Norway "] license = "MIT" readme = "README.md" +homepage = "https://github.com/statisticsnorway/datadoc" repository = "https://github.com/statisticsnorway/datadoc" - +documentation = "https://statisticsnorway.github.io/datadoc" classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "Framework :: Dash", "Typing :: Typed", ] -packages = [{ include = "datadoc" }] +packages = [{ include = "datadoc", from = "src" }] [tool.poetry.scripts] datadoc = 'datadoc.app:main' +[tool.poetry.urls] +Changelog = "https://github.com/statisticsnorway/datadoc/releases" + [tool.poetry.dependencies] python = ">=3.10,<4.0" pyarrow = ">=8.0.0" @@ -40,62 +44,134 @@ nbstripout = ">=0.5.0" python-kacl = "*" pre-commit = "*" pytest-mock = "*" -poethepoet = "*" ruff = ">=0.0.284" deptry = "^0.12.0" +pygments = ">=2.10.0" +coverage = { extras = ["toml"], version = ">=6.2" } +furo = ">=2021.11.12" +sphinx = ">=6.2.1" +sphinx-autobuild = ">=2021.3.14" +sphinx-autodoc-typehints = ">=1.24.0" +sphinx-click = ">=3.0.2" +typeguard = ">=2.13.3" +xdoctest = { extras = ["colors"], version = ">=0.15.10" } +myst-parser = { version = ">=0.16.1" } +pandas-stubs = "*" +types-Pygments = "*" +types-colorama = "*" +types-setuptools = "*" -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" -[tool.poe.tasks] -test = "pytest -vvvv --cache-clear --cov-report=term-missing --cov=." -jupyter = "jupyter lab" -install-kernel = "python -m ipykernel install --user --name datadoc" -datadoc = "python datadoc/app.py" +[tool.pytest.ini_options] +pythonpath = ["src/datadoc"] -[tool.black] -target-version = ["py310", "py311"] -include = '\.pyi?$' +[tool.coverage.paths] +source = ["src", "*/site-packages"] +tests = ["tests", "*/tests"] [tool.coverage.run] +branch = true +source = ["datadoc"] omit = [ "datadoc/tests/*", "datadoc/__init__.py", "datadoc/frontend/callbacks/register.py", ] +[tool.coverage.report] +show_missing = true +fail_under = 80 + +[tool.mypy] +strict = false +warn_unreachable = true +pretty = true +show_column_numbers = true +show_error_context = true + +[[tool.mypy.overrides]] +# Allow missing type hints in third-party libraries without type information. +module = [ + "dash", + "dash_bootstrap_components", + "flask_healthz", + "dapla", + "gcsfs", + "pyarrow", + "pyarrow.parquet", + "dash.development.base_component", + "datadoc_model", + "datadoc_model.model", + "pytest_mock", +] +ignore_missing_imports = true + +# Disable specific error codes in the 'tests' package +# Also don't require type annotations +[[tool.mypy.overrides]] +module = ["tests.*"] +disable_error_code = [ + "var-annotated", + "has-type", + "no-any-return", + "no-untyped-def", +] + [tool.ruff] -exclude = [ - ".git", +force-exclude = true # Apply excludes to pre-commit +show-fixes = true +src = ["src", "tests"] +target-version = "py39" # Minimum Python version supported + +# Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/ +select = ["ALL"] +ignore = [ + "A003", + "ANN101", # Suppress missing-type-self. + "ANN102", # Suppress missing-type-cls. + "B008", + "D100", # Suppress undocumented-public-module. Only doc of public api required. + "E501", # Let black handle line length + "TRY003", +] +include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"] +extend-exclude = [ "__pycache__", "old", - "build", - "dist", - ".venv", - ".eggs", - ".tox", ".ipynb_checkpoints", + "noxfile.py", + "docs/conf.py", ] -ignore = ["A003", "B008", "E501"] -select = ["ALL"] + +[tool.ruff.isort] +force-single-line = true [tool.ruff.mccabe] -max-complexity = 10 +max-complexity = 15 + +[tool.ruff.pydocstyle] +convention = "google" # You can also use "numpy". [tool.ruff.pep8-naming] -classmethod-decorators = ["classmethod", "validator", "root_validator"] +classmethod-decorators = [ + "classmethod", + "validator", + "root_validator", + "pydantic.validator", +] [tool.ruff.per-file-ignores] "*/__init__.py" = ["F401"] -"datadoc/frontend/callbacks/register_callbacks.py" = ["C901"] -"datadoc/tests/*" = [ - # asserts are encouraged in pytest - "S101", - # return annotations don't add value for test functions - "ANN201", - # docstrings are overkill for test functions - "D103", +"src/datadoc/frontend/callbacks/register_callbacks.py" = ["C901"] +"tests/**" = [ + "S101", # asserts are encouraged in pytest + "ANN201", # return annotations don't add value for test functions + "D103", # docstrings are overkill for test functions + "D100", ] # This filename is a convention for Gunicorn -"datadoc/gunicorn.conf.py" = ["N999"] +"src/datadoc/gunicorn.conf.py" = ["N999"] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 00000000..55903673 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,11 @@ +sonar.projectKey=statisticsnorway_datadoc +sonar.organization=statisticsnorway +sonar.host.url=https://sonarcloud.io + +sonar.sources=src/datadoc +sonar.tests=tests + +sonar.python.version=3.10 +sonar.python.coverage.reportPaths=coverage.xml + +sonar.sourceEncoding=UTF-8 diff --git a/datadoc/__init__.py b/src/datadoc/__init__.py similarity index 100% rename from datadoc/__init__.py rename to src/datadoc/__init__.py diff --git a/datadoc/app.py b/src/datadoc/app.py similarity index 83% rename from datadoc/app.py rename to src/datadoc/app.py index 4948a199..f23c44f0 100644 --- a/datadoc/app.py +++ b/src/datadoc/app.py @@ -16,22 +16,20 @@ from datadoc.backend.datadoc_metadata import DataDocMetadata from datadoc.enums import SupportedLanguages from datadoc.frontend.callbacks.register_callbacks import register_callbacks -from datadoc.frontend.components.alerts import ( - dataset_validation_error, - opened_dataset_error, - opened_dataset_success, - saved_metadata_success, - variables_validation_error, -) -from datadoc.frontend.components.control_bars import ( - build_controls_bar, - build_language_dropdown, - header, - progress_bar, -) +from datadoc.frontend.components.alerts import dataset_validation_error +from datadoc.frontend.components.alerts import opened_dataset_error +from datadoc.frontend.components.alerts import opened_dataset_success +from datadoc.frontend.components.alerts import saved_metadata_success +from datadoc.frontend.components.alerts import variables_validation_error +from datadoc.frontend.components.control_bars import build_controls_bar +from datadoc.frontend.components.control_bars import build_language_dropdown +from datadoc.frontend.components.control_bars import header +from datadoc.frontend.components.control_bars import progress_bar from datadoc.frontend.components.dataset_tab import build_dataset_tab from datadoc.frontend.components.variables_tab import build_variables_tab -from datadoc.utils import get_app_version, pick_random_port, running_in_notebook +from datadoc.utils import get_app_version +from datadoc.utils import pick_random_port +from datadoc.utils import running_in_notebook logger = logging.getLogger(__name__) diff --git a/datadoc/assets/bootstrap-icons.css b/src/datadoc/assets/bootstrap-icons.css similarity index 100% rename from datadoc/assets/bootstrap-icons.css rename to src/datadoc/assets/bootstrap-icons.css diff --git a/datadoc/assets/bootstrap.min.css b/src/datadoc/assets/bootstrap.min.css similarity index 100% rename from datadoc/assets/bootstrap.min.css rename to src/datadoc/assets/bootstrap.min.css diff --git a/datadoc/assets/bundle.css b/src/datadoc/assets/bundle.css similarity index 100% rename from datadoc/assets/bundle.css rename to src/datadoc/assets/bundle.css diff --git a/datadoc/assets/fonts/bootstrap-icons.woff b/src/datadoc/assets/fonts/bootstrap-icons.woff similarity index 100% rename from datadoc/assets/fonts/bootstrap-icons.woff rename to src/datadoc/assets/fonts/bootstrap-icons.woff diff --git a/datadoc/assets/fonts/bootstrap-icons.woff2 b/src/datadoc/assets/fonts/bootstrap-icons.woff2 similarity index 100% rename from datadoc/assets/fonts/bootstrap-icons.woff2 rename to src/datadoc/assets/fonts/bootstrap-icons.woff2 diff --git a/datadoc/backend/__init__.py b/src/datadoc/backend/__init__.py similarity index 100% rename from datadoc/backend/__init__.py rename to src/datadoc/backend/__init__.py diff --git a/datadoc/backend/datadoc_metadata.py b/src/datadoc/backend/datadoc_metadata.py similarity index 79% rename from datadoc/backend/datadoc_metadata.py rename to src/datadoc/backend/datadoc_metadata.py index 6d4b02a2..427f4ff3 100644 --- a/datadoc/backend/datadoc_metadata.py +++ b/src/datadoc/backend/datadoc_metadata.py @@ -14,9 +14,13 @@ from datadoc.backend.dataset_parser import DatasetParser from datadoc.backend.model_backwards_compatibility import upgrade_metadata from datadoc.backend.storage_adapter import StorageAdapter -from datadoc.enums import DatasetState, SupportedLanguages, VariableRole -from datadoc.frontend.fields import display_dataset, display_variables -from datadoc.utils import calculate_percentage, get_timestamp_now +from datadoc.enums import DatasetState +from datadoc.enums import SupportedLanguages +from datadoc.enums import VariableRole +from datadoc.frontend.fields import display_dataset +from datadoc.frontend.fields import display_variables +from datadoc.utils import calculate_percentage +from datadoc.utils import get_timestamp_now if TYPE_CHECKING: from datetime import datetime @@ -32,12 +36,12 @@ class DataDocMetadata: """Handle reading, updating and writing of metadata.""" def __init__( - self: t.Self @ DataDocMetadata, - dataset_path: str | os.PathLike | None = None, - metadata_document_path: str | os.PathLike | None = None, + self, + dataset_path: str | os.PathLike[str] | None = None, + metadata_document_path: str | os.PathLike[str] | None = None, ) -> None: """Read in a dataset if supplied, otherwise naively instantiate the class.""" - self.dataset: str = dataset_path + self.dataset: pathlib.Path | None = None self.metadata_document: StorageAdapter | None = None self.container: model.MetadataContainer | None = None @@ -56,20 +60,21 @@ def __init__( # In this case the user has specified an independent metadata document for editing # without a dataset. self.metadata_document = StorageAdapter.for_path(metadata_document_path) - self.extract_metadata_from_existing_document() + self.extract_metadata_from_existing_document(self.metadata_document) - elif self.dataset: + elif dataset_path: + self.dataset = pathlib.Path(dataset_path) # The short_name is set as the dataset filename without file extension - self.short_name: str = pathlib.Path( + self.short_name = pathlib.Path( self.dataset, ).stem - self.metadata_document: StorageAdapter = StorageAdapter.for_path( + self.metadata_document = StorageAdapter.for_path( StorageAdapter.for_path(self.dataset).parent(), ) self.metadata_document.joinpath( self.short_name + METADATA_DOCUMENT_FILE_SUFFIX, ) - self.dataset_state: DatasetState = self.get_dataset_state(self.dataset) + self.dataset_state = self.get_dataset_state(self.dataset) self.extract_metadata_from_files() @@ -83,13 +88,11 @@ def __init__( ) def get_dataset_state( - self: t.Self @ DataDocMetadata, - dataset: str, + self, + dataset: pathlib.Path, ) -> DatasetState | None: """Use the path to attempt to guess the state of the dataset.""" - if dataset is None: - return None - dataset_path_parts = set(pathlib.Path(dataset).parts) + dataset_path_parts = set(dataset.parts) for state in DatasetState: # We assume that files are saved in the Norwegian language as specified by SSB. norwegian_dataset_state_path_part = state.get_value_for_language( @@ -106,8 +109,8 @@ def get_dataset_state( return None + @staticmethod def get_dataset_version( - self: t.Self @ DataDocMetadata, dataset_stem: str, ) -> str | None: """Find version information if exists in filename. @@ -127,16 +130,16 @@ def get_dataset_version( return last_filename_element[1:] return None - def extract_metadata_from_files(self: t.Self @ DataDocMetadata) -> None: + def extract_metadata_from_files(self) -> None: """Read metadata from an existing metadata document. If no metadata document exists, create one from scratch by extracting metadata from the dataset file. """ - if self.metadata_document.exists(): - self.extract_metadata_from_existing_document() - else: - self.extract_metadata_from_dataset() + if self.metadata_document is not None and self.metadata_document.exists(): + self.extract_metadata_from_existing_document(self.metadata_document) + elif self.dataset is not None: + self.extract_metadata_from_dataset(self.dataset, self.short_name or "") self.meta.dataset.id = uuid.uuid4() @@ -153,15 +156,15 @@ def extract_metadata_from_files(self: t.Self @ DataDocMetadata) -> None: self.variables_lookup = {v.short_name: v for v in self.meta.variables} - def extract_metadata_from_existing_document(self: t.Self @ DataDocMetadata) -> None: + def extract_metadata_from_existing_document(self, document: StorageAdapter) -> None: """There's an existing metadata document, so read in the metadata from that.""" fresh_metadata = {} try: - with self.metadata_document.open(mode="r", encoding="utf-8") as file: + with document.open(mode="r", encoding="utf-8") as file: fresh_metadata = json.load(file) logger.info( "Opened existing metadata file %s", - self.metadata_document.location, + document.location, ) if self.is_metadata_in_container_structure(fresh_metadata): @@ -184,12 +187,12 @@ def extract_metadata_from_existing_document(self: t.Self @ DataDocMetadata) -> N logger.warning( "Could not open existing metadata file %s. \ Falling back to collecting data from the dataset", - self.metadata_document.location, + document.location, exc_info=True, ) def is_metadata_in_container_structure( - self: t.Self @ DataDocMetadata, + self, metadata: dict, ) -> bool: """At a certain point a metadata 'container' was introduced. @@ -199,25 +202,29 @@ def is_metadata_in_container_structure( """ return "datadoc" in metadata and "dataset" in metadata["datadoc"] - def extract_metadata_from_dataset(self: t.Self @ DataDocMetadata) -> None: + def extract_metadata_from_dataset( + self, + dataset: pathlib.Path, + short_name: str, + ) -> None: """Obtain what metadata we can from the dataset itself. This makes it easier for the user by 'pre-filling' certain fields. Certain elements are dependent on the dataset being saved according to SSB's standard. """ - self.ds_schema = DatasetParser.for_file(self.dataset) + self.ds_schema: DatasetParser = DatasetParser.for_file(dataset) self.meta.dataset = model.Dataset( short_name=self.short_name, dataset_state=self.dataset_state, - version=self.get_dataset_version(self.short_name), + version=self.get_dataset_version(short_name), data_source_path=self.dataset, created_by=self.current_user, ) self.meta.variables = self.ds_schema.get_fields() - def write_metadata_document(self: t.Self @ DataDocMetadata) -> None: + def write_metadata_document(self) -> None: """Write all currently known metadata to file.""" timestamp: datetime = get_timestamp_now() if self.meta.dataset.metadata_created_date is None: @@ -232,23 +239,28 @@ def write_metadata_document(self: t.Self @ DataDocMetadata) -> None: else: self.container = model.MetadataContainer(datadoc=self.meta) - self.metadata_document.write_text(self.container.model_dump_json(indent=4)) - logger.info("Saved metadata document %s", self.metadata_document.location) + if self.metadata_document: + self.metadata_document.write_text(self.container.model_dump_json(indent=4)) + logger.info("Saved metadata document %s", self.metadata_document.location) + else: + msg = "No metadata document to save" + raise ValueError(msg) @property - def percent_complete(self: t.Self @ DataDocMetadata) -> int: + def percent_complete(self) -> int: """The percentage of obligatory metadata completed. A metadata field is counted as complete when any non-None value is assigned. Used for a live progress bar in the UI, as well as being saved in the datadoc as a simple quality indicator. """ - num_all_fields = len(display_dataset.OBLIGATORY_DATASET_METADATA) + num_all_fields = len(display_dataset.OBLIGATORY_DATASET_METADATA_IDENTIFIERS) num_set_fields = len( [ k for k, v in self.meta.dataset.model_dump().items() - if k in display_dataset.OBLIGATORY_DATASET_METADATA and v is not None + if k in display_dataset.OBLIGATORY_DATASET_METADATA_IDENTIFIERS + and v is not None ], ) diff --git a/datadoc/backend/dataset_parser.py b/src/datadoc/backend/dataset_parser.py similarity index 84% rename from datadoc/backend/dataset_parser.py rename to src/datadoc/backend/dataset_parser.py index b87a6d77..9e321370 100644 --- a/datadoc/backend/dataset_parser.py +++ b/src/datadoc/backend/dataset_parser.py @@ -5,20 +5,22 @@ from __future__ import annotations -import pathlib import re import typing as t -from abc import ABC, abstractmethod +from abc import ABC +from abc import abstractmethod import pandas as pd import pyarrow.parquet as pq -from datadoc_model.model import LanguageStringType, Variable +from datadoc_model.model import LanguageStringType +from datadoc_model.model import Variable from datadoc import state from datadoc.backend.storage_adapter import StorageAdapter from datadoc.enums import DataType -TDatasetParser = t.TypeVar("TDatasetParser", bound="DatasetParser") +if t.TYPE_CHECKING: + import pathlib KNOWN_INTEGER_TYPES = ( "int", @@ -75,17 +77,19 @@ KNOWN_BOOLEAN_TYPES = ("bool", "bool_", "boolean") -TYPE_CORRESPONDENCE: list[tuple[list[str], DataType]] = [ +TYPE_CORRESPONDENCE: list[tuple[tuple[str, ...], DataType]] = [ (KNOWN_INTEGER_TYPES, DataType.INTEGER), (KNOWN_FLOAT_TYPES, DataType.FLOAT), (KNOWN_STRING_TYPES, DataType.STRING), (KNOWN_DATETIME_TYPES, DataType.DATETIME), (KNOWN_BOOLEAN_TYPES, DataType.BOOLEAN), ] -TYPE_MAP: dict[str:DataType] = {} +TYPE_MAP: dict[str, DataType] = {} for concrete_type, abstract_type in TYPE_CORRESPONDENCE: TYPE_MAP.update({c: abstract_type for c in concrete_type}) +TDatasetParser = t.TypeVar("TDatasetParser", bound="DatasetParser") + class DatasetParser(ABC): """Abstract Base Class for all Dataset parsers. @@ -98,24 +102,27 @@ class DatasetParser(ABC): - A method to extract variables (columns) from the dataset, so they may be documented. """ - def __init__(self: t.Self @ DatasetParser, dataset: str) -> None: + def __init__(self, dataset: pathlib.Path) -> None: """Initialize for a given dataset.""" self.dataset: StorageAdapter = StorageAdapter.for_path(dataset) @staticmethod - def for_file(dataset: str) -> TDatasetParser: + def for_file(dataset: pathlib.Path) -> DatasetParser: """Return the correct subclass based on the given dataset file.""" - supported_file_types = { - "parquet": DatasetParserParquet, - "sas7bdat": DatasetParserSas7Bdat, - "parquet.gzip": DatasetParserParquet, + supported_file_types: dict[ + str, + type[DatasetParser], + ] = { + ".parquet": DatasetParserParquet, + ".sas7bdat": DatasetParserSas7Bdat, + ".parquet.gzip": DatasetParserParquet, } file_type = "Unknown" try: - file_type = str(pathlib.Path(dataset)).lower().split(".")[-1] + file_type = dataset.suffix # Gzipped parquet files can be read with DatasetParserParquet - match = re.search(r"(.parquet.gzip)", str(pathlib.Path(dataset)).lower()) - file_type = "parquet.gzip" if match else file_type + match = re.search(r"(.parquet.gzip)", str(dataset).lower()) + file_type = ".parquet.gzip" if match else file_type # Extract the appropriate reader class from the SUPPORTED_FILE_TYPES dict and return an instance of it reader = supported_file_types[file_type](dataset) except IndexError as e: @@ -148,18 +155,18 @@ def transform_data_type(data_type: str) -> DataType | None: return TYPE_MAP.get(data_type.lower(), None) @abstractmethod - def get_fields(self: t.Self @ DatasetParser) -> list[Variable]: + def get_fields(self) -> list[Variable]: """Abstract method, must be implemented by subclasses.""" class DatasetParserParquet(DatasetParser): """Concrete implementation for parsing parquet files.""" - def __init__(self: t.Self @ DatasetParserParquet, dataset: str) -> None: + def __init__(self, dataset: pathlib.Path) -> None: """Use the super init method.""" super().__init__(dataset) - def get_fields(self: t.Self @ DatasetParserParquet) -> list[Variable]: + def get_fields(self) -> list[Variable]: """Extract the fields from this dataset.""" with self.dataset.open(mode="rb") as f: data_table = pq.read_table(f) @@ -175,16 +182,16 @@ def get_fields(self: t.Self @ DatasetParserParquet) -> list[Variable]: class DatasetParserSas7Bdat(DatasetParser): """Concrete implementation for parsing SAS7BDAT files.""" - def __init__(self: t.Self @ DatasetParserSas7Bdat, dataset: str) -> None: + def __init__(self, dataset: pathlib.Path) -> None: """Use the super init method.""" super().__init__(dataset) - def get_fields(self: t.Self @ DatasetParserSas7Bdat) -> list[Variable]: + def get_fields(self) -> list[Variable]: """Extract the fields from this dataset.""" fields = [] with self.dataset.open(mode="rb") as f: # Use an iterator to avoid reading in the entire dataset - sas_reader = pd.read_sas(f, format="sas7bdat", iterator=True) + sas_reader = pd.read_sas(f, format="sas7bdat", iterator=True) # type: ignore [call-overload] # Get the first row from the iterator try: diff --git a/datadoc/backend/model_backwards_compatibility.py b/src/datadoc/backend/model_backwards_compatibility.py similarity index 87% rename from datadoc/backend/model_backwards_compatibility.py rename to src/datadoc/backend/model_backwards_compatibility.py index 50ab7602..95012aae 100644 --- a/datadoc/backend/model_backwards_compatibility.py +++ b/src/datadoc/backend/model_backwards_compatibility.py @@ -11,14 +11,16 @@ from __future__ import annotations -import typing as t from collections import OrderedDict from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import datetime +from datetime import timezone +from typing import TYPE_CHECKING +from typing import Any from datadoc_model.model import LanguageStringType -if t.TYPE_CHECKING: +if TYPE_CHECKING: from collections.abc import Callable VERSION_FIELD_NAME = "document_version" @@ -28,15 +30,15 @@ class UnknownModelVersionError(Exception): """Throw this error if we haven't seen the version before.""" def __init__( - self: t.Self @ UnknownModelVersionError, + self, supplied_version: str, - *args: tuple[t.Any, ...], + *args: tuple[Any, ...], ) -> None: """Initialize class.""" super().__init__(args) self.supplied_version = supplied_version - def __str__(self: t.Self @ UnknownModelVersionError) -> str: + def __str__(self) -> str: """Return string representation.""" return f"Document Version ({self.supplied_version}) of discovered file is not supported" @@ -49,19 +51,19 @@ class BackwardsCompatibleVersion: """A version which we support with backwards compatibility.""" version: str - handler: Callable + handler: Callable[[dict[str, Any]], dict[str, Any]] - def __post_init__(self: t.Self @ BackwardsCompatibleVersion) -> None: + def __post_init__(self) -> None: """Register this version in the supported versions map.""" SUPPORTED_VERSIONS[self.version] = self -def handle_current_version(supplied_metadata: dict) -> dict: +def handle_current_version(supplied_metadata: dict[str, Any]) -> dict[str, Any]: """Nothing to do here.""" return supplied_metadata -def handle_version_1_0_0(supplied_metadata: dict) -> dict: +def handle_version_1_0_0(supplied_metadata: dict[str, Any]) -> dict[str, Any]: """Handle breaking changes for v1.0.0.""" datetime_fields = [ ("metadata_created_date"), @@ -85,7 +87,7 @@ def handle_version_1_0_0(supplied_metadata: dict) -> dict: return supplied_metadata -def handle_version_0_1_1(supplied_metadata: dict) -> dict: +def handle_version_0_1_1(supplied_metadata: dict[str, Any]) -> dict[str, Any]: """Handle breaking changes for v0.1.1. PR ref: https://github.com/statisticsnorway/ssb-datadoc-model/pull/4. @@ -121,7 +123,7 @@ def handle_version_0_1_1(supplied_metadata: dict) -> dict: ) -def upgrade_metadata(fresh_metadata: dict) -> dict: +def upgrade_metadata(fresh_metadata: dict[str, Any]) -> dict[str, Any]: """Run the handler for this version to upgrade the document to the latest version.""" # Special case for current version, we expose the current_model_version parameter for test purposes supplied_version = fresh_metadata[VERSION_FIELD_NAME] diff --git a/datadoc/backend/storage_adapter.py b/src/datadoc/backend/storage_adapter.py similarity index 65% rename from datadoc/backend/storage_adapter.py rename to src/datadoc/backend/storage_adapter.py index a2310b0c..cd0a875e 100644 --- a/datadoc/backend/storage_adapter.py +++ b/src/datadoc/backend/storage_adapter.py @@ -7,11 +7,15 @@ import logging import pathlib -import typing as t -from urllib.parse import urlsplit, urlunsplit +from typing import TYPE_CHECKING +from typing import Protocol +from urllib.parse import urlsplit +from urllib.parse import urlunsplit -if t.TYPE_CHECKING: - from io import IOBase, TextIOWrapper +if TYPE_CHECKING: + import os + from io import IOBase + from io import TextIOWrapper GCS_PROTOCOL_PREFIX = "gs://" @@ -21,11 +25,12 @@ class GCSObject: """Implementation of the Protocol 'StorageAdapter' for Google Cloud Storage.""" - def __init__(self: t.Self @ GCSObject, path: str) -> None: + def __init__(self, path: str | os.PathLike) -> None: """Initialize the class.""" - self._url = urlsplit(path) + self._url = urlsplit(str(path)) try: - from dapla import AuthClient, FileClient + from dapla import AuthClient + from dapla import FileClient if AuthClient.is_ready(): # Running on Dapla, rely on dapla-toolbelt for auth @@ -44,21 +49,26 @@ def __init__(self: t.Self @ GCSObject, path: str) -> None: msg = "Missing support for GCS. Install datadoc with 'pip install ssb-datadoc[gcs]'" raise ImportError(msg) from e - def _rebuild_url(self: t.Self @ GCSObject, new_path: str | pathlib.Path) -> str: + @staticmethod + def for_path(path: str | os.PathLike) -> StorageAdapter: + """Return an instance of this class instantiated for path.""" + return GCSObject(path) + + def _rebuild_url(self, new_path: str | os.PathLike) -> str: return urlunsplit( (self._url.scheme, self._url.netloc, str(new_path), None, None), ) - def open(self: t.Self @ GCSObject, **kwargs: dict[str, t.Any]) -> IOBase: + def open(self, **kwargs: str) -> IOBase: """Return a file-like-object.""" return self.fs.open(self.location, **kwargs) - def parent(self: t.Self @ GCSObject) -> str: + def parent(self) -> str: """Return the logical parent of this object.""" - parent = pathlib.Path(self._url.path).parent + parent = pathlib.PurePosixPath(self._url.path).parent return self._rebuild_url(parent) - def joinpath(self: t.Self @ GCSObject, part: str) -> None: + def joinpath(self, part: str) -> None: """Join 'part' onto the current path. In-place operation. @@ -67,18 +77,18 @@ def joinpath(self: t.Self @ GCSObject, part: str) -> None: self._rebuild_url(pathlib.Path(self._url.path) / part), ) - def exists(self: t.Self @ GCSObject) -> bool: + def exists(self) -> bool: """Return True if the object exists.""" return self.fs.exists(self.location) - def write_text(self: t.Self @ GCSObject, text: str) -> None: + def write_text(self, text: str) -> None: """Write the given text to disk.""" f: TextIOWrapper with self.fs.open(self.location, mode="w") as f: f.write(text) @property - def location(self: t.Self @ GCSObject) -> str: + def location(self) -> str: """Return a locator for this object.""" return urlunsplit(self._url) @@ -86,75 +96,79 @@ def location(self: t.Self @ GCSObject) -> str: class LocalFile: """Implementation of the Protocol 'StorageAdapter' for file systems.""" - def __init__(self: t.Self @ LocalFile, path: str) -> None: + def __init__(self, path: str | os.PathLike) -> None: """Initialize the class.""" self._path_object: pathlib.Path = pathlib.Path(path) - def open(self: t.Self @ LocalFile, **kwargs: dict[str, t.Any]) -> IOBase: + @staticmethod + def for_path(path: str | os.PathLike) -> StorageAdapter: + """Return an instance of this class instantiated for path.""" + return LocalFile(path) + + def open(self, **kwargs: str) -> IOBase: """Return a file-like-object.""" - return pathlib.Path.open(self._path_object, **kwargs) + return pathlib.Path.open(self._path_object, **kwargs) # type: ignore [call-overload] - def parent(self: t.Self @ LocalFile) -> str: + def parent(self) -> str: """Return the parent of this file.""" return str(self._path_object.resolve().parent) - def joinpath(self: t.Self @ LocalFile, part: str) -> None: + def joinpath(self, part: str) -> None: """Join 'part' onto the current path. In-place operation. """ self._path_object = self._path_object.joinpath(part) - def exists(self: t.Self @ LocalFile) -> bool: + def exists(self) -> bool: """Return True if the file exists.""" return self._path_object.exists() - def write_text(self: t.Self @ LocalFile, text: str) -> None: + def write_text(self, text: str) -> None: """Write the given text to disk.""" self._path_object.write_text(text, encoding="utf-8") @property - def location(self: t.Self @ LocalFile) -> str: + def location(self) -> str: """Return a locator for this object.""" return str(self._path_object) -class StorageAdapter(t.Protocol): +class StorageAdapter(Protocol): """Implement this Protocol for the technologies on which we store datasets and metadata documents.""" @staticmethod - def for_path(path: str | pathlib.Path) -> StorageAdapter: + def for_path(path: str | os.PathLike) -> StorageAdapter: """Return a concrete class implementing this Protocol based on the structure of the path.""" - path = str(path) - if path.startswith(GCS_PROTOCOL_PREFIX): + if str(path).startswith(GCS_PROTOCOL_PREFIX): return GCSObject(path) return LocalFile(path) - def open(self: t.Self @ StorageAdapter, **kwargs: dict[str, t.Any]) -> IOBase: + def open(self, **kwargs: str) -> IOBase: """Return a file-like-object.""" ... - def parent(self: t.Self @ StorageAdapter) -> str: + def parent(self) -> str: """Return the logical parent of this instance.""" ... - def joinpath(self: t.Self @ StorageAdapter, part: str) -> None: + def joinpath(self, part: str) -> None: """Join 'part' onto the current path. In-place operation. """ ... - def exists(self: t.Self @ StorageAdapter) -> bool: + def exists(self) -> bool: """Return True if the object exists.""" ... - def write_text(self: t.Self @ StorageAdapter, text: str) -> None: + def write_text(self, text: str) -> None: """Write the given text to disk.""" ... @property - def location(self: t.Self @ StorageAdapter) -> str: + def location(self) -> str: """Return a locator for this object.""" ... diff --git a/datadoc/enums.py b/src/datadoc/enums.py similarity index 95% rename from datadoc/enums.py rename to src/datadoc/enums.py index 252f6f77..4c408ba1 100644 --- a/datadoc/enums.py +++ b/src/datadoc/enums.py @@ -2,15 +2,10 @@ from __future__ import annotations from enum import Enum -from typing import TYPE_CHECKING from datadoc_model import model from datadoc_model.model import LanguageStringType -if TYPE_CHECKING: - # Avoid circular imports - from typing import Self - class SupportedLanguages(str, Enum): """The list of languages metadata may be recorded in. @@ -27,7 +22,7 @@ class LanguageStringsEnum(Enum): """Enum class for storing LanguageStringType objects.""" def __init__( - self: Self @ LanguageStringsEnum, + self, language_strings: LanguageStringType, ) -> None: """Store the LanguageStringType object for displaying enum values in multiple languages. @@ -40,10 +35,10 @@ def __init__( self.language_strings = language_strings @classmethod - def _missing_(cls: type[Self @ LanguageStringsEnum], value: str) -> Enum: + def _missing_(cls, value: object) -> LanguageStringsEnum: """Support constructing an enum member from a supplied name string.""" try: - member = cls._member_map_[value] + member: LanguageStringsEnum = cls._member_map_[str(value)] # type: ignore [assignment] except KeyError as e: # Raise the expected exception with a useful explanation message = f"{value} is not a valid {cls.__qualname__}" @@ -52,11 +47,11 @@ def _missing_(cls: type[Self @ LanguageStringsEnum], value: str) -> Enum: return member def get_value_for_language( - self: Self @ LanguageStringsEnum, + self, language: SupportedLanguages, ) -> str: """Retrieve the string for the relevant language.""" - return getattr(self.language_strings, language.value) + return str(getattr(self.language_strings, language.value)) class Assessment(LanguageStringsEnum): diff --git a/src/datadoc/frontend/__init__.py b/src/datadoc/frontend/__init__.py new file mode 100644 index 00000000..9fe2297d --- /dev/null +++ b/src/datadoc/frontend/__init__.py @@ -0,0 +1 @@ +"""Code relating to Dash and the user interface.""" diff --git a/datadoc/frontend/callbacks/__init__.py b/src/datadoc/frontend/callbacks/__init__.py similarity index 100% rename from datadoc/frontend/callbacks/__init__.py rename to src/datadoc/frontend/callbacks/__init__.py diff --git a/datadoc/frontend/callbacks/dataset.py b/src/datadoc/frontend/callbacks/dataset.py similarity index 75% rename from datadoc/frontend/callbacks/dataset.py rename to src/datadoc/frontend/callbacks/dataset.py index e9c1f24b..47be811a 100644 --- a/datadoc/frontend/callbacks/dataset.py +++ b/src/datadoc/frontend/callbacks/dataset.py @@ -10,28 +10,20 @@ from pydantic import ValidationError from datadoc import state -from datadoc.backend.datadoc_metadata import ( - METADATA_DOCUMENT_FILE_SUFFIX, - DataDocMetadata, -) -from datadoc.frontend.callbacks.utils import ( - MetadataInputTypes, - find_existing_language_string, - get_options_for_language, - update_global_language_state, -) -from datadoc.frontend.fields.display_dataset import ( - DISPLAYED_DATASET_METADATA, - DISPLAYED_DROPDOWN_DATASET_ENUMS, - MULTIPLE_LANGUAGE_DATASET_METADATA, - DatasetIdentifiers, -) +from datadoc.backend.datadoc_metadata import METADATA_DOCUMENT_FILE_SUFFIX +from datadoc.backend.datadoc_metadata import DataDocMetadata +from datadoc.frontend.callbacks.utils import MetadataInputTypes +from datadoc.frontend.callbacks.utils import find_existing_language_string +from datadoc.frontend.callbacks.utils import get_options_for_language +from datadoc.frontend.callbacks.utils import update_global_language_state +from datadoc.frontend.fields.display_dataset import DISPLAYED_DATASET_METADATA +from datadoc.frontend.fields.display_dataset import DISPLAYED_DROPDOWN_DATASET_ENUMS +from datadoc.frontend.fields.display_dataset import MULTIPLE_LANGUAGE_DATASET_METADATA +from datadoc.frontend.fields.display_dataset import DatasetIdentifiers if t.TYPE_CHECKING: from pathlib import Path - from datadoc_model import model - from datadoc.enums import SupportedLanguages logger = logging.getLogger(__name__) @@ -54,7 +46,7 @@ def get_dataset_path() -> str | Path | None: def open_file(file_path: str | Path | None = None) -> None: """Load the given dataset into an DataDocMetadata instance.""" - if file_path and file_path.endswith(METADATA_DOCUMENT_FILE_SUFFIX): + if file_path and str(file_path).endswith(METADATA_DOCUMENT_FILE_SUFFIX): state.metadata = DataDocMetadata(metadata_document_path=file_path) logger.info("Opened existing metadata document %s", file_path) else: @@ -66,7 +58,7 @@ def open_file(file_path: str | Path | None = None) -> None: def open_dataset_handling( n_clicks: int, file_path: str, -) -> tuple[bool, bool, str, SupportedLanguages]: +) -> tuple[bool, bool, str, str]: """Handle errors and other logic around opening a dataset file.""" try: open_file(file_path) @@ -90,37 +82,44 @@ def open_dataset_handling( return False, False, "", state.current_metadata_language.value -def process_keyword(value: str) -> list[str] | None: +def process_keyword(value: str) -> list[str]: """Convert a comma separated string to a list of strings. e.g. 'a,b ,c' -> ['a', 'b', 'c'] """ - if value is None: - return None return [item.strip() for item in value.split(",")] def process_special_cases( - value: str, + value: MetadataInputTypes, metadata_identifier: str, -) -> list[str] | model.LanguageStringType | None: +) -> MetadataInputTypes: """Pre-process metadata where needed. Some types of metadata need processing before being saved to the model. Handle these cases here, other values are returned unchanged. """ - if metadata_identifier == DatasetIdentifiers.KEYWORD.value: - value = process_keyword(value) - if metadata_identifier in MULTIPLE_LANGUAGE_DATASET_METADATA: - value = find_existing_language_string( + updated_value: MetadataInputTypes + if metadata_identifier == DatasetIdentifiers.KEYWORD.value and isinstance( + value, + str, + ): + updated_value = process_keyword(value) + elif metadata_identifier in MULTIPLE_LANGUAGE_DATASET_METADATA and isinstance( + value, + str, + ): + updated_value = find_existing_language_string( state.metadata.meta.dataset, value, metadata_identifier, ) + else: + updated_value = value # Other values get returned unchanged - return value + return updated_value def accept_dataset_metadata_input( @@ -170,7 +169,7 @@ def update_dataset_metadata_language() -> list[MetadataInputTypes]: def change_language_dataset_metadata( language: SupportedLanguages, -) -> tuple[tuple[list[dict[str, str]], ...], list]: +) -> tuple[object, ...]: """Change the language for the displayed dataset metadata. This is done in three steps: diff --git a/datadoc/frontend/callbacks/register_callbacks.py b/src/datadoc/frontend/callbacks/register_callbacks.py similarity index 89% rename from datadoc/frontend/callbacks/register_callbacks.py rename to src/datadoc/frontend/callbacks/register_callbacks.py index 909d1917..2c23f4ef 100644 --- a/datadoc/frontend/callbacks/register_callbacks.py +++ b/src/datadoc/frontend/callbacks/register_callbacks.py @@ -8,20 +8,23 @@ from typing import TYPE_CHECKING -from dash import ALL, Dash, Input, Output, State, ctx +from dash import ALL +from dash import Dash +from dash import Input +from dash import Output +from dash import State +from dash import ctx from datadoc import state from datadoc.enums import SupportedLanguages -from datadoc.frontend.callbacks.dataset import ( - accept_dataset_metadata_input, - change_language_dataset_metadata, - open_dataset_handling, -) +from datadoc.frontend.callbacks.dataset import accept_dataset_metadata_input +from datadoc.frontend.callbacks.dataset import change_language_dataset_metadata +from datadoc.frontend.callbacks.dataset import open_dataset_handling +from datadoc.frontend.callbacks.variables import accept_variable_metadata_input from datadoc.frontend.callbacks.variables import ( - accept_variable_metadata_input, update_variable_table_dropdown_options_for_language, - update_variable_table_language, ) +from datadoc.frontend.callbacks.variables import update_variable_table_language from datadoc.frontend.components.dataset_tab import DATASET_METADATA_INPUT from datadoc.frontend.fields.display_dataset import DISPLAYED_DROPDOWN_DATASET_METADATA @@ -80,7 +83,7 @@ def callback_save_metadata_file(n_clicks: int) -> bool: ) def callback_change_language_dataset_metadata( language: str, - ) -> tuple[tuple[list[dict[str, str]], ...], list]: + ) -> tuple[object, ...]: """Update dataset metadata values upon change of language.""" return change_language_dataset_metadata(SupportedLanguages(language)) @@ -138,7 +141,7 @@ def callback_variable_table( ) def callback_variable_table_dropdown_options( language: str, - ) -> dict[str, dict[str, list[dict[str, str]]]]: + ) -> dict[str, dict[str, object]]: """Update the options in variable table dropdowns when the language changes.""" language = SupportedLanguages(language) return update_variable_table_dropdown_options_for_language(language) @@ -154,7 +157,7 @@ def callback_variable_table_dropdown_options( def callback_open_dataset( n_clicks: int, dataset_path: str, - ) -> tuple[bool, bool, str, SupportedLanguages]: + ) -> tuple[bool, bool, str, str]: """Open a dataset. Shows an alert on success or failure. diff --git a/datadoc/frontend/callbacks/utils.py b/src/datadoc/frontend/callbacks/utils.py similarity index 73% rename from datadoc/frontend/callbacks/utils.py rename to src/datadoc/frontend/callbacks/utils.py index 9224d7bf..66d575e8 100644 --- a/datadoc/frontend/callbacks/utils.py +++ b/src/datadoc/frontend/callbacks/utils.py @@ -4,10 +4,12 @@ import logging from typing import TYPE_CHECKING +from typing import TypeAlias from datadoc_model import model -from datadoc import enums, state +from datadoc import enums +from datadoc import state if TYPE_CHECKING: from enum import Enum @@ -19,7 +21,7 @@ logger = logging.getLogger(__name__) -MetadataInputTypes: type = str | int | float | bool | None +MetadataInputTypes: TypeAlias = str | list[str] | int | float | bool | None def update_global_language_state(language: SupportedLanguages) -> None: @@ -33,7 +35,13 @@ def get_language_strings_enum(enum: Enum) -> enums.LanguageStringsEnum: We need multiple languages to display in the front end, but the model only defines a single language in the enums. """ - return getattr(enums, enum.__name__) + language_strings_enum: enums.LanguageStringsEnum = getattr(enums, enum.__name__) # type: ignore [attr-defined] + if not issubclass(language_strings_enum, enums.LanguageStringsEnum): # type: ignore [arg-type] + message = f"Expected {language_strings_enum} to be a subclass of LanguageStringsEnum, but is {type(language_strings_enum)}" + raise TypeError( + message, + ) + return language_strings_enum def get_options_for_language( @@ -46,7 +54,7 @@ def get_options_for_language( "label": i.get_value_for_language(language), "value": i.name, } - for i in get_language_strings_enum(enum) + for i in get_language_strings_enum(enum) # type: ignore [attr-defined] ] @@ -54,7 +62,7 @@ def find_existing_language_string( metadata_model_object: pydantic.BaseModel, value: str, metadata_identifier: str, -) -> model.LanguageStringType | None: +) -> model.LanguageStringType: """Get or create a LanguageStrings object and return it.""" # In this case we need to set the string to the correct language code language_strings = getattr(metadata_model_object, metadata_identifier) diff --git a/datadoc/frontend/callbacks/variables.py b/src/datadoc/frontend/callbacks/variables.py similarity index 90% rename from datadoc/frontend/callbacks/variables.py rename to src/datadoc/frontend/callbacks/variables.py index 4556e019..6a361bed 100644 --- a/datadoc/frontend/callbacks/variables.py +++ b/src/datadoc/frontend/callbacks/variables.py @@ -3,24 +3,28 @@ from __future__ import annotations import logging +from typing import TYPE_CHECKING from pydantic import ValidationError from datadoc import state from datadoc.enums import SupportedLanguages -from datadoc.frontend.callbacks.utils import ( - MetadataInputTypes, - find_existing_language_string, - get_options_for_language, -) +from datadoc.frontend.callbacks.utils import MetadataInputTypes +from datadoc.frontend.callbacks.utils import find_existing_language_string +from datadoc.frontend.callbacks.utils import get_options_for_language from datadoc.frontend.fields.display_variables import ( DISPLAYED_DROPDOWN_VARIABLES_METADATA, - DISPLAYED_DROPDOWN_VARIABLES_TYPES, +) +from datadoc.frontend.fields.display_variables import DISPLAYED_DROPDOWN_VARIABLES_TYPES +from datadoc.frontend.fields.display_variables import ( MULTIPLE_LANGUAGE_VARIABLES_METADATA, - VariableIdentifiers, ) +from datadoc.frontend.fields.display_variables import VariableIdentifiers from datadoc.utils import get_display_values +if TYPE_CHECKING: + from datadoc_model import model + logger = logging.getLogger(__name__) @@ -75,7 +79,7 @@ def handle_multi_language_metadata( metadata_field: str, new_value: MetadataInputTypes, updated_row_id: str, -) -> str | None: +) -> MetadataInputTypes | model.LanguageStringType: """Handle updates to fields which support multiple languages.""" if new_value is None: # This edge case occurs when the user removes the text in an input field @@ -143,7 +147,7 @@ def accept_variable_metadata_input( def update_variable_table_dropdown_options_for_language( language: SupportedLanguages, -) -> dict[str, dict[str, list[dict[str, str]]]]: +) -> dict[str, dict[str, object]]: """Retrieve enum options for dropdowns in the Datatable. Handles the special case of boolean values which we represent in the Datatable @@ -161,7 +165,7 @@ def update_variable_table_dropdown_options_for_language( ... } """ - options = [] + options: list[dict[str, object]] = [] for field_type in DISPLAYED_DROPDOWN_VARIABLES_TYPES: value = ( get_boolean_options_for_language(language) diff --git a/datadoc/frontend/components/__init__.py b/src/datadoc/frontend/components/__init__.py similarity index 100% rename from datadoc/frontend/components/__init__.py rename to src/datadoc/frontend/components/__init__.py diff --git a/datadoc/frontend/components/alerts.py b/src/datadoc/frontend/components/alerts.py similarity index 88% rename from datadoc/frontend/components/alerts.py rename to src/datadoc/frontend/components/alerts.py index 861ad56d..d2062544 100644 --- a/datadoc/frontend/components/alerts.py +++ b/src/datadoc/frontend/components/alerts.py @@ -2,7 +2,8 @@ from __future__ import annotations -from datadoc.frontend.components.builders import AlertTypes, build_ssb_alert +from datadoc.frontend.components.builders import AlertTypes +from datadoc.frontend.components.builders import build_ssb_alert dataset_validation_error = build_ssb_alert( AlertTypes.WARNING, diff --git a/datadoc/frontend/components/builders.py b/src/datadoc/frontend/components/builders.py similarity index 98% rename from datadoc/frontend/components/builders.py rename to src/datadoc/frontend/components/builders.py index 4439231b..bd6a86d9 100644 --- a/datadoc/frontend/components/builders.py +++ b/src/datadoc/frontend/components/builders.py @@ -2,7 +2,8 @@ import re from dataclasses import dataclass -from enum import Enum, auto +from enum import Enum +from enum import auto import dash_bootstrap_components as dbc from dash import html diff --git a/datadoc/frontend/components/control_bars.py b/src/datadoc/frontend/components/control_bars.py similarity index 98% rename from datadoc/frontend/components/control_bars.py rename to src/datadoc/frontend/components/control_bars.py index b0b885f8..06d25884 100644 --- a/datadoc/frontend/components/control_bars.py +++ b/src/datadoc/frontend/components/control_bars.py @@ -3,7 +3,8 @@ from __future__ import annotations import dash_bootstrap_components as dbc -from dash import dcc, html +from dash import dcc +from dash import html from datadoc import state from datadoc.enums import SupportedLanguages diff --git a/datadoc/frontend/components/dataset_tab.py b/src/datadoc/frontend/components/dataset_tab.py similarity index 87% rename from datadoc/frontend/components/dataset_tab.py rename to src/datadoc/frontend/components/dataset_tab.py index 7470a47f..98211be1 100644 --- a/datadoc/frontend/components/dataset_tab.py +++ b/src/datadoc/frontend/components/dataset_tab.py @@ -6,12 +6,10 @@ from dash import html from datadoc.frontend.components.builders import build_ssb_styled_tab -from datadoc.frontend.fields.display_dataset import ( - NON_EDITABLE_DATASET_METADATA, - OBLIGATORY_EDITABLE_DATASET_METADATA, - OPTIONAL_DATASET_METADATA, - DisplayDatasetMetadata, -) +from datadoc.frontend.fields.display_dataset import NON_EDITABLE_DATASET_METADATA +from datadoc.frontend.fields.display_dataset import OBLIGATORY_EDITABLE_DATASET_METADATA +from datadoc.frontend.fields.display_dataset import OPTIONAL_DATASET_METADATA +from datadoc.frontend.fields.display_dataset import DisplayDatasetMetadata DATASET_METADATA_INPUT = "dataset-metadata-input" diff --git a/datadoc/frontend/components/variables_tab.py b/src/datadoc/frontend/components/variables_tab.py similarity index 94% rename from datadoc/frontend/components/variables_tab.py rename to src/datadoc/frontend/components/variables_tab.py index d0fce2c5..06dd07eb 100644 --- a/datadoc/frontend/components/variables_tab.py +++ b/src/datadoc/frontend/components/variables_tab.py @@ -3,14 +3,13 @@ from __future__ import annotations import dash_bootstrap_components as dbc -from dash import dash_table, html +from dash import dash_table +from dash import html from datadoc import state from datadoc.frontend.components.builders import build_ssb_styled_tab -from datadoc.frontend.fields.display_variables import ( - DISPLAY_VARIABLES, - VariableIdentifiers, -) +from datadoc.frontend.fields.display_variables import DISPLAY_VARIABLES +from datadoc.frontend.fields.display_variables import VariableIdentifiers from datadoc.utils import get_display_values diff --git a/datadoc/frontend/fields/__init__.py b/src/datadoc/frontend/fields/__init__.py similarity index 100% rename from datadoc/frontend/fields/__init__.py rename to src/datadoc/frontend/fields/__init__.py diff --git a/datadoc/frontend/fields/display_base.py b/src/datadoc/frontend/fields/display_base.py similarity index 92% rename from datadoc/frontend/fields/display_base.py rename to src/datadoc/frontend/fields/display_base.py index 8fd1bf2a..8f1bcf2a 100644 --- a/datadoc/frontend/fields/display_base.py +++ b/src/datadoc/frontend/fields/display_base.py @@ -4,8 +4,10 @@ import logging import typing as t -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any +from dataclasses import dataclass +from dataclasses import field +from typing import TYPE_CHECKING +from typing import Any from dash import dcc @@ -61,15 +63,16 @@ def get_multi_language_metadata(metadata: BaseModel, identifier: str) -> str | N value: LanguageStringType = getattr(metadata, identifier) if value is None: return value - return getattr(value, state.current_metadata_language) + return str(getattr(value, state.current_metadata_language)) def get_comma_separated_string(metadata: BaseModel, identifier: str) -> str: """Get a metadata value which is a list of strings from the model and convert it to a comma separated string.""" value: list[str] = getattr(metadata, identifier) - if value is None: + try: + return ", ".join(value) + except TypeError: return "" - return ", ".join(value) @dataclass diff --git a/datadoc/frontend/fields/display_dataset.py b/src/datadoc/frontend/fields/display_dataset.py similarity index 94% rename from datadoc/frontend/fields/display_dataset.py rename to src/datadoc/frontend/fields/display_dataset.py index 3f947ed6..e1ac0443 100644 --- a/datadoc/frontend/fields/display_dataset.py +++ b/src/datadoc/frontend/fields/display_dataset.py @@ -9,14 +9,12 @@ from dash import dcc from datadoc_model import model -from datadoc.frontend.fields.display_base import ( - DROPDOWN_KWARGS, - NUMBER_KWARGS, - DisplayDatasetMetadata, - get_comma_separated_string, - get_metadata_and_stringify, - get_multi_language_metadata, -) +from datadoc.frontend.fields.display_base import DROPDOWN_KWARGS +from datadoc.frontend.fields.display_base import NUMBER_KWARGS +from datadoc.frontend.fields.display_base import DisplayDatasetMetadata +from datadoc.frontend.fields.display_base import get_comma_separated_string +from datadoc.frontend.fields.display_base import get_metadata_and_stringify +from datadoc.frontend.fields.display_base import get_multi_language_metadata logger = logging.getLogger(__name__) @@ -252,16 +250,16 @@ class DatasetIdentifiers(str, Enum): + NON_EDITABLE_DATASET_METADATA ) -DISPLAYED_DROPDOWN_DATASET_METADATA = [ +DISPLAYED_DROPDOWN_DATASET_METADATA: list[DisplayDatasetMetadata] = [ m for m in DISPLAYED_DATASET_METADATA if m.component == dcc.Dropdown ] types = typing.get_type_hints(model.Dataset) -DISPLAYED_DROPDOWN_DATASET_ENUMS = [ +DISPLAYED_DROPDOWN_DATASET_ENUMS: list[Enum] = [ typing.get_args(types[m.identifier])[0] for m in DISPLAYED_DROPDOWN_DATASET_METADATA ] -OBLIGATORY_DATASET_METADATA = [ +OBLIGATORY_DATASET_METADATA_IDENTIFIERS: list[str] = [ m.identifier for m in DISPLAY_DATASET.values() if m.obligatory and m.editable ] diff --git a/datadoc/frontend/fields/display_variables.py b/src/datadoc/frontend/fields/display_variables.py similarity index 100% rename from datadoc/frontend/fields/display_variables.py rename to src/datadoc/frontend/fields/display_variables.py diff --git a/src/datadoc/py.typed b/src/datadoc/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/datadoc/state.py b/src/datadoc/state.py similarity index 74% rename from datadoc/state.py rename to src/datadoc/state.py index bfd048ff..48949b39 100644 --- a/datadoc/state.py +++ b/src/datadoc/state.py @@ -7,18 +7,16 @@ strategy must be modified, since users will modify each others data. See here: https://dash.plotly.com/sharing-data-between-callbacks """ +from __future__ import annotations from typing import TYPE_CHECKING -from datadoc.enums import SupportedLanguages - if TYPE_CHECKING: - # This is only needed for a type hint so we put the import inside - # this check to avoid circular imports from datadoc.backend.datadoc_metadata import DataDocMetadata + from datadoc.enums import SupportedLanguages # Global metadata container -metadata: "DataDocMetadata" +metadata: DataDocMetadata current_metadata_language: SupportedLanguages diff --git a/datadoc/utils.py b/src/datadoc/utils.py similarity index 90% rename from datadoc/utils.py rename to src/datadoc/utils.py index 2ae56a56..cc98e2ed 100644 --- a/datadoc/utils.py +++ b/src/datadoc/utils.py @@ -2,6 +2,7 @@ import datetime import importlib +from typing import Any from datadoc_model import model from pydantic import AnyUrl @@ -12,7 +13,7 @@ def running_in_notebook() -> bool: """Return True if running in Jupyter Notebook.""" try: - return get_ipython().__class__.__name__ == "ZMQInteractiveShell" + return bool(get_ipython().__class__.__name__ == "ZMQInteractiveShell") # type: ignore [name-defined] except NameError: # The get_ipython method is globally available in ipython interpreters # as used in Jupyter. However it is not available in other python @@ -29,7 +30,7 @@ def calculate_percentage(completed: int, total: int) -> int: def get_display_values( variable: model.Variable, current_language: SupportedLanguages, -) -> dict: +) -> dict[str, Any]: """Return a dictionary representation of Model.DataDocVariable with strings in the currently selected language.""" return_dict = {} for field_name, value in variable: @@ -55,7 +56,7 @@ def pick_random_port() -> int: return int(sock.getsockname()[1]) -def get_timestamp_now() -> datetime: +def get_timestamp_now() -> datetime.datetime: """Return a timestamp for the current moment.""" return datetime.datetime.now(tz=datetime.timezone.utc) diff --git a/datadoc/wsgi.py b/src/datadoc/wsgi.py similarity index 100% rename from datadoc/wsgi.py rename to src/datadoc/wsgi.py diff --git a/datadoc/tests/__init__.py b/tests/__init__.py similarity index 100% rename from datadoc/tests/__init__.py rename to tests/__init__.py diff --git a/datadoc/tests/conftest.py b/tests/conftest.py similarity index 72% rename from datadoc/tests/conftest.py rename to tests/conftest.py index f77d0135..09532c8f 100644 --- a/datadoc/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,9 @@ import shutil import traceback -from datetime import datetime, timezone +from collections.abc import Generator +from datetime import datetime +from datetime import timezone from pathlib import Path from unittest import mock @@ -14,14 +16,12 @@ from datadoc.backend.storage_adapter import StorageAdapter from datadoc.enums import SupportedLanguages -from .utils import ( - TEST_BUCKET_PARQUET_FILEPATH, - TEST_EXISTING_METADATA_DIRECTORY, - TEST_EXISTING_METADATA_FILE_NAME, - TEST_EXISTING_METADATA_WITH_VALID_ID_DIRECTORY, - TEST_PARQUET_FILEPATH, - TEST_RESOURCES_METADATA_DOCUMENT, -) +from .utils import TEST_BUCKET_PARQUET_FILEPATH +from .utils import TEST_EXISTING_METADATA_DIRECTORY +from .utils import TEST_EXISTING_METADATA_FILE_NAME +from .utils import TEST_EXISTING_METADATA_WITH_VALID_ID_DIRECTORY +from .utils import TEST_PARQUET_FILEPATH +from .utils import TEST_RESOURCES_METADATA_DOCUMENT @pytest.fixture() @@ -43,7 +43,7 @@ def metadata(_mock_timestamp: None) -> DataDocMetadata: @pytest.fixture() -def remove_document_file() -> None: +def remove_document_file() -> Generator[None, None, None]: # Yield so we only run teardown yield None try: @@ -80,35 +80,37 @@ def existing_metadata_with_valid_id_file(existing_metadata_file: Path) -> Path: @pytest.fixture() def _clear_state() -> None: """Global fixture, referred to in pytest.ini.""" - state.metadata = None + state.metadata = None # type: ignore [assignment] state.current_metadata_language = SupportedLanguages.NORSK_BOKMÅL @pytest.fixture() -def mock_gcsfs_open(mocker: MockerFixture): +def mock_gcsfs_open(mocker: MockerFixture) -> mock.Mock: return mocker.patch("gcsfs.GCSFileSystem.open") @pytest.fixture() -def mock_gcsfs_exists(mocker: MockerFixture): +def mock_gcsfs_exists(mocker: MockerFixture) -> mock.Mock: mock = mocker.patch("gcsfs.GCSFileSystem.exists") mock.return_value = True return mock @pytest.fixture() -def mock_pathlib_write_text(mocker: MockerFixture): +def mock_pathlib_write_text(mocker: MockerFixture) -> mock.Mock: return mocker.patch("pathlib.Path.write_text") @pytest.fixture() -def local_parquet_file(mock_pathlib_write_text: mock.patch): # noqa: ARG001 +def local_parquet_file( + mock_pathlib_write_text: mock.Mock, # noqa: ARG001 +) -> StorageAdapter: return StorageAdapter.for_path(str(TEST_PARQUET_FILEPATH)) @pytest.fixture() def bucket_object_parquet_file( - mock_gcsfs_open: mock.patch, # noqa: ARG001 - mock_gcsfs_exists: mock.patch, # noqa: ARG001 -): + mock_gcsfs_open: mock.Mock, # noqa: ARG001 + mock_gcsfs_exists: mock.Mock, # noqa: ARG001 +) -> StorageAdapter: return StorageAdapter.for_path(TEST_BUCKET_PARQUET_FILEPATH) diff --git a/datadoc/tests/pytest.ini b/tests/pytest.ini similarity index 100% rename from datadoc/tests/pytest.ini rename to tests/pytest.ini diff --git a/datadoc/tests/resources/existing_metadata_file/compatibility/README.md b/tests/resources/existing_metadata_file/compatibility/README.md similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/compatibility/README.md rename to tests/resources/existing_metadata_file/compatibility/README.md diff --git a/datadoc/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json rename to tests/resources/existing_metadata_file/compatibility/v0_1_1/person_data_v1__DOC.json diff --git a/datadoc/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json rename to tests/resources/existing_metadata_file/compatibility/v1_0_0/person_data_v1__DOC.json diff --git a/datadoc/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json rename to tests/resources/existing_metadata_file/invalid_id_field/person_data_v1__DOC.json diff --git a/datadoc/tests/resources/existing_metadata_file/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/person_data_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/person_data_v1__DOC.json rename to tests/resources/existing_metadata_file/person_data_v1__DOC.json diff --git a/datadoc/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json b/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json rename to tests/resources/existing_metadata_file/valid_id_field/person_data_v1__DOC.json diff --git a/datadoc/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1.parquet b/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1.parquet similarity index 100% rename from datadoc/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1.parquet rename to tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1.parquet diff --git a/datadoc/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json b/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json similarity index 100% rename from datadoc/tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json rename to tests/resources/klargjorte_data/befolkning/person_testdata_p2021-12-31_p2021-12-31_v1__DOC.json diff --git a/datadoc/tests/resources/person_data.csv b/tests/resources/person_data.csv similarity index 100% rename from datadoc/tests/resources/person_data.csv rename to tests/resources/person_data.csv diff --git a/datadoc/tests/resources/person_data_v1.parquet b/tests/resources/person_data_v1.parquet similarity index 100% rename from datadoc/tests/resources/person_data_v1.parquet rename to tests/resources/person_data_v1.parquet diff --git a/datadoc/tests/resources/person_data_v1.parquet.gzip b/tests/resources/person_data_v1.parquet.gzip similarity index 100% rename from datadoc/tests/resources/person_data_v1.parquet.gzip rename to tests/resources/person_data_v1.parquet.gzip diff --git a/datadoc/tests/resources/sasdata.sas7bdat b/tests/resources/sasdata.sas7bdat similarity index 100% rename from datadoc/tests/resources/sasdata.sas7bdat rename to tests/resources/sasdata.sas7bdat diff --git a/datadoc/tests/test_callbacks.py b/tests/test_callbacks.py similarity index 87% rename from datadoc/tests/test_callbacks.py rename to tests/test_callbacks.py index 93b915ac..8fdb7973 100644 --- a/datadoc/tests/test_callbacks.py +++ b/tests/test_callbacks.py @@ -10,31 +10,26 @@ from datadoc import state from datadoc.backend.datadoc_metadata import DataDocMetadata -from datadoc.enums import ( - DatasetState, - DataType, - LanguageStringsEnum, - SupportedLanguages, -) -from datadoc.frontend.callbacks.dataset import ( - accept_dataset_metadata_input, - change_language_dataset_metadata, - update_dataset_metadata_language, - update_global_language_state, -) -from datadoc.frontend.callbacks.utils import ( - MetadataInputTypes, - find_existing_language_string, - get_language_strings_enum, -) +from datadoc.enums import DatasetState +from datadoc.enums import DataType +from datadoc.enums import LanguageStringsEnum +from datadoc.enums import SupportedLanguages +from datadoc.frontend.callbacks.dataset import accept_dataset_metadata_input +from datadoc.frontend.callbacks.dataset import change_language_dataset_metadata +from datadoc.frontend.callbacks.dataset import update_dataset_metadata_language +from datadoc.frontend.callbacks.dataset import update_global_language_state +from datadoc.frontend.callbacks.utils import MetadataInputTypes +from datadoc.frontend.callbacks.utils import find_existing_language_string +from datadoc.frontend.callbacks.utils import get_language_strings_enum +from datadoc.frontend.callbacks.utils import get_options_for_language +from datadoc.frontend.callbacks.variables import accept_variable_metadata_input from datadoc.frontend.callbacks.variables import ( - accept_variable_metadata_input, update_variable_table_dropdown_options_for_language, - update_variable_table_language, ) +from datadoc.frontend.callbacks.variables import update_variable_table_language from datadoc.frontend.fields.display_dataset import DISPLAYED_DROPDOWN_DATASET_ENUMS from datadoc.frontend.fields.display_variables import VariableIdentifiers -from datadoc.tests.utils import TEST_PARQUET_FILEPATH +from tests.utils import TEST_PARQUET_FILEPATH DATA_ORIGINAL = [ { @@ -288,7 +283,7 @@ def test_change_language_dataset_metadata(): ], ) def test_get_language_strings_enum(model_enum: Enum): - assert issubclass(get_language_strings_enum(model_enum), LanguageStringsEnum) + assert issubclass(get_language_strings_enum(model_enum), LanguageStringsEnum) # type: ignore [arg-type] def test_get_language_strings_enum_unknown(): @@ -299,3 +294,15 @@ class TestEnum(Enum): with pytest.raises(AttributeError): get_language_strings_enum(TestEnum) + + +@pytest.mark.parametrize( + "enum", + DISPLAYED_DROPDOWN_DATASET_ENUMS, +) +@pytest.mark.parametrize("language", list(SupportedLanguages)) +def test_get_options_for_language(language: SupportedLanguages, enum: Enum): + for o in get_options_for_language(language, enum): + assert list(o.keys()) == ["label", "value"] + assert isinstance(o["label"], str) + assert isinstance(o["value"], str) diff --git a/datadoc/tests/test_datadoc_metadata.py b/tests/test_datadoc_metadata.py similarity index 82% rename from datadoc/tests/test_datadoc_metadata.py rename to tests/test_datadoc_metadata.py index ae18c83c..24a1dc04 100644 --- a/datadoc/tests/test_datadoc_metadata.py +++ b/tests/test_datadoc_metadata.py @@ -2,33 +2,34 @@ from __future__ import annotations import json +import pathlib from copy import copy -from pathlib import Path, PurePath +from pathlib import Path +from pathlib import PurePath from typing import TYPE_CHECKING from uuid import UUID import pytest -from datadoc_model.model import ( - DatadocJsonSchema, - Dataset, - Variable, -) +from datadoc_model.model import DatadocJsonSchema +from datadoc_model.model import Dataset +from datadoc_model.model import Variable -from datadoc.backend.datadoc_metadata import PLACEHOLDER_USERNAME, DataDocMetadata -from datadoc.enums import DatasetState, DataType, VariableRole +from datadoc.backend.datadoc_metadata import PLACEHOLDER_USERNAME +from datadoc.backend.datadoc_metadata import DataDocMetadata +from datadoc.enums import DatasetState +from datadoc.enums import DataType +from datadoc.enums import VariableRole -from .utils import ( - TEST_EXISTING_METADATA_DIRECTORY, - TEST_EXISTING_METADATA_FILE_NAME, - TEST_PARQUET_FILEPATH, - TEST_RESOURCES_DIRECTORY, -) +from .utils import TEST_EXISTING_METADATA_DIRECTORY +from .utils import TEST_EXISTING_METADATA_FILE_NAME +from .utils import TEST_PARQUET_FILEPATH +from .utils import TEST_RESOURCES_DIRECTORY if TYPE_CHECKING: from datetime import datetime -def make_paths() -> list[tuple[str, DatasetState]]: +def make_paths() -> list[tuple[str, DatasetState | None]]: split_path = list(PurePath(TEST_PARQUET_FILEPATH).parts) initial_data = [ ("kildedata", DatasetState.SOURCE_DATA), @@ -45,8 +46,8 @@ def make_paths() -> list[tuple[str, DatasetState]]: for to_insert, state in initial_data: new_path = copy(split_path) new_path.insert(-2, to_insert) - new_path = PurePath().joinpath(*new_path) - test_data.append((str(new_path), state)) + joined_path = PurePath().joinpath(*new_path) + test_data.append((str(joined_path), state)) return test_data @@ -57,14 +58,10 @@ def test_get_dataset_state( expected_result: DatasetState, metadata: DataDocMetadata, ): - actual_state = metadata.get_dataset_state(path) + actual_state = metadata.get_dataset_state(pathlib.Path(path)) assert actual_state == expected_result -def test_get_dataset_state_none(metadata: DataDocMetadata): - assert metadata.get_dataset_state(None) is None - - @pytest.mark.usefixtures("existing_metadata_file", "remove_document_file") def test_existing_metadata_file( metadata: DataDocMetadata, @@ -86,12 +83,20 @@ def test_metadata_document_percent_complete(metadata: DataDocMetadata): assert metadata.percent_complete == 17 # noqa: PLR2004 -def test_get_dataset_version(metadata: DataDocMetadata): - assert metadata.get_dataset_version(metadata.short_name) == "1" - - -def test_get_dataset_version_unknown(metadata: DataDocMetadata): - assert metadata.get_dataset_version("person_data.parquet") is None +@pytest.mark.parametrize( + ("short_name", "expected"), + [ + ("person_data_v1", "1"), + ("person_data_v2", "2"), + ("person_data", None), + ("person_testdata_p2021-12-31_p2021-12-31_v20", "20"), + ], +) +def test_get_dataset_version( + short_name: str, + expected: str | None, +): + assert DataDocMetadata.get_dataset_version(short_name) == expected @pytest.mark.usefixtures("remove_document_file") @@ -156,10 +161,8 @@ def test_existing_metadata_none_id( existing_metadata_file: str, metadata: DataDocMetadata, ): - pre_open_id = "" - post_write_id = "" with Path.open(Path(existing_metadata_file)) as f: - pre_open_id = json.load(f)["datadoc"]["dataset"]["id"] + pre_open_id: None = json.load(f)["datadoc"]["dataset"]["id"] assert pre_open_id is None assert isinstance(metadata.meta.dataset.id, UUID) metadata.write_metadata_document() diff --git a/datadoc/tests/test_dataset_parser.py b/tests/test_dataset_parser.py similarity index 73% rename from datadoc/tests/test_dataset_parser.py rename to tests/test_dataset_parser.py index a0cf6b8c..580595bf 100644 --- a/datadoc/tests/test_dataset_parser.py +++ b/tests/test_dataset_parser.py @@ -1,25 +1,25 @@ """Tests for the DatasetParser class.""" +import pathlib + import pytest -from datadoc_model.model import LanguageStringType, Variable +from datadoc_model.model import LanguageStringType +from datadoc_model.model import Variable from datadoc import state -from datadoc.backend.dataset_parser import ( - KNOWN_BOOLEAN_TYPES, - KNOWN_DATETIME_TYPES, - KNOWN_FLOAT_TYPES, - KNOWN_INTEGER_TYPES, - KNOWN_STRING_TYPES, - DatasetParser, - DatasetParserParquet, -) -from datadoc.enums import DataType, SupportedLanguages +from datadoc.backend.dataset_parser import KNOWN_BOOLEAN_TYPES +from datadoc.backend.dataset_parser import KNOWN_DATETIME_TYPES +from datadoc.backend.dataset_parser import KNOWN_FLOAT_TYPES +from datadoc.backend.dataset_parser import KNOWN_INTEGER_TYPES +from datadoc.backend.dataset_parser import KNOWN_STRING_TYPES +from datadoc.backend.dataset_parser import DatasetParser +from datadoc.backend.dataset_parser import DatasetParserParquet +from datadoc.enums import DataType +from datadoc.enums import SupportedLanguages -from .utils import ( - TEST_PARQUET_FILEPATH, - TEST_PARQUET_GZIP_FILEPATH, - TEST_SAS7BDAT_FILEPATH, -) +from .utils import TEST_PARQUET_FILEPATH +from .utils import TEST_PARQUET_GZIP_FILEPATH +from .utils import TEST_SAS7BDAT_FILEPATH def test_use_abstract_class_directly(): @@ -76,14 +76,10 @@ def test_get_fields_sas7bdat(): assert fields == expected_fields -def test_get_fields_unknown_file_type(): - with pytest.raises(NotImplementedError): - DatasetParser.for_file("my_dataset.csv").get_fields() - - -def test_get_fields_no_extension_provided(): +@pytest.mark.parametrize("file", ["my_dataset.csv", "my_dataset.xlsx", "my_dataset"]) +def test_dataset_parser_unsupported_files(file: pathlib.Path): with pytest.raises(NotImplementedError): - DatasetParser.for_file("my_dataset").get_fields() + DatasetParser.for_file(pathlib.Path(file)) def test_transform_datatype_unknown_type(): diff --git a/datadoc/tests/test_model.py b/tests/test_model.py similarity index 74% rename from datadoc/tests/test_model.py rename to tests/test_model.py index a79a9a3b..9b1e94ea 100644 --- a/datadoc/tests/test_model.py +++ b/tests/test_model.py @@ -1,12 +1,12 @@ """Verify that we are in sync with the Model.""" -from datadoc_model.model import Dataset, Variable +from datadoc_model.model import Dataset +from datadoc_model.model import Variable -from datadoc.frontend.fields.display_dataset import DISPLAY_DATASET, DatasetIdentifiers -from datadoc.frontend.fields.display_variables import ( - DISPLAY_VARIABLES, - VariableIdentifiers, -) +from datadoc.frontend.fields.display_dataset import DISPLAY_DATASET +from datadoc.frontend.fields.display_dataset import DatasetIdentifiers +from datadoc.frontend.fields.display_variables import DISPLAY_VARIABLES +from datadoc.frontend.fields.display_variables import VariableIdentifiers def test_dataset_metadata_definition_parity(): diff --git a/datadoc/tests/test_model_backwards_compatibility.py b/tests/test_model_backwards_compatibility.py similarity index 90% rename from datadoc/tests/test_model_backwards_compatibility.py rename to tests/test_model_backwards_compatibility.py index 1da89468..264f63d6 100644 --- a/datadoc/tests/test_model_backwards_compatibility.py +++ b/tests/test_model_backwards_compatibility.py @@ -6,10 +6,8 @@ import pytest from datadoc.backend.datadoc_metadata import DataDocMetadata -from datadoc.backend.model_backwards_compatibility import ( - UnknownModelVersionError, - upgrade_metadata, -) +from datadoc.backend.model_backwards_compatibility import UnknownModelVersionError +from datadoc.backend.model_backwards_compatibility import upgrade_metadata from .utils import TEST_COMPATIBILITY_DIRECTORY diff --git a/datadoc/tests/test_smoke.py b/tests/test_smoke.py similarity index 100% rename from datadoc/tests/test_smoke.py rename to tests/test_smoke.py diff --git a/datadoc/tests/test_storage_adapter.py b/tests/test_storage_adapter.py similarity index 61% rename from datadoc/tests/test_storage_adapter.py rename to tests/test_storage_adapter.py index bb1ffcda..1c31b870 100644 --- a/datadoc/tests/test_storage_adapter.py +++ b/tests/test_storage_adapter.py @@ -4,34 +4,40 @@ import pytest -from datadoc.backend.storage_adapter import GCSObject, LocalFile, StorageAdapter -from datadoc.tests.utils import TEST_BUCKET_PARQUET_FILEPATH, TEST_PARQUET_FILEPATH +from datadoc.backend.storage_adapter import GCSObject +from datadoc.backend.storage_adapter import LocalFile +from datadoc.backend.storage_adapter import StorageAdapter +from tests.utils import TEST_BUCKET_PARQUET_FILEPATH +from tests.utils import TEST_PARQUET_FILEPATH @pytest.mark.parametrize( - ("file", "expected_class"), + ("file_name", "expected_class"), [("local_parquet_file", LocalFile), ("bucket_object_parquet_file", GCSObject)], ) def test_factory( - file: StorageAdapter, - expected_class: StorageAdapter, + file_name: str, + expected_class: type[StorageAdapter], request: pytest.FixtureRequest, ): # Ugly pytest magic to get the actual fixture out - file: StorageAdapter = request.getfixturevalue(file) + file: StorageAdapter = request.getfixturevalue(file_name) assert isinstance(file, expected_class) -@pytest.mark.parametrize("file", ["local_parquet_file", "bucket_object_parquet_file"]) -def test_open(file: str, request: pytest.FixtureRequest): +@pytest.mark.parametrize( + "file_name", + ["local_parquet_file", "bucket_object_parquet_file"], +) +def test_open(file_name: str, request: pytest.FixtureRequest): # Ugly pytest magic to get the actual fixture out - file: StorageAdapter = request.getfixturevalue(file) + file: StorageAdapter = request.getfixturevalue(file_name) with file.open() as file_handle: assert file_handle.readable() @pytest.mark.parametrize( - ("file", "expected_parent"), + ("file_name", "expected_parent"), [ ( "local_parquet_file", @@ -43,14 +49,14 @@ def test_open(file: str, request: pytest.FixtureRequest): ), ], ) -def test_parent(file: str, expected_parent: str, request: pytest.FixtureRequest): +def test_parent(file_name: str, expected_parent: str, request: pytest.FixtureRequest): # Ugly pytest magic to get the actual fixture out - file: StorageAdapter = request.getfixturevalue(file) + file: StorageAdapter = request.getfixturevalue(file_name) assert file.parent() == expected_parent @pytest.mark.parametrize( - ("known_file", "expected"), + ("file_name", "expected"), [ ( "local_parquet_file", @@ -63,23 +69,23 @@ def test_parent(file: str, expected_parent: str, request: pytest.FixtureRequest) ], ) def test_joinpath( - known_file: str, + file_name: str, expected: str, request: pytest.FixtureRequest, ): # Ugly pytest magic to get the actual fixture out - actual_file: StorageAdapter = request.getfixturevalue(known_file) + actual_file: StorageAdapter = request.getfixturevalue(file_name) actual_file.joinpath("extra") assert pathlib.Path(actual_file.location) == pathlib.Path(expected) @pytest.mark.parametrize( - "known_file", + "file_name", ["local_parquet_file", "bucket_object_parquet_file"], ) -def test_exists(known_file: str, request: pytest.FixtureRequest): +def test_exists(file_name: str, request: pytest.FixtureRequest): # Ugly pytest magic to get the actual fixture out - actual_file: StorageAdapter = request.getfixturevalue(known_file) + actual_file: StorageAdapter = request.getfixturevalue(file_name) assert actual_file.exists() @@ -90,7 +96,3 @@ def test_write_text_local_file( local_parquet_file.write_text("12345") mock = request.getfixturevalue("mock_pathlib_write_text") mock.assert_called_once_with("12345", encoding="utf-8") - - -# Currently no test for writing to GCS -# Attempts to mock are failing with TypeError: cannot set 'write' attribute of immutable type '_io.BufferedWriter' diff --git a/datadoc/tests/test_utils.py b/tests/test_utils.py similarity index 67% rename from datadoc/tests/test_utils.py rename to tests/test_utils.py index 2ab96e63..b6417355 100644 --- a/datadoc/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,8 +3,11 @@ from datadoc_model.model import Variable from datadoc.enums import SupportedLanguages -from datadoc.tests.test_callbacks import BOKMÅL_NAME, LANGUAGE_OBJECT -from datadoc.utils import calculate_percentage, get_display_values, running_in_notebook +from datadoc.utils import calculate_percentage +from datadoc.utils import get_display_values +from datadoc.utils import running_in_notebook +from tests.test_callbacks import BOKMÅL_NAME +from tests.test_callbacks import LANGUAGE_OBJECT def test_not_running_in_notebook(): diff --git a/datadoc/tests/utils.py b/tests/utils.py similarity index 94% rename from datadoc/tests/utils.py rename to tests/utils.py index 05ca1684..92c629ae 100644 --- a/datadoc/tests/utils.py +++ b/tests/utils.py @@ -4,7 +4,7 @@ TEST_BUCKET_PARQUET_FILEPATH = "gs://ssb-staging-dapla-felles-data-delt/datadoc/klargjorte_data/person_data_v1.parquet" -TEST_RESOURCES_DIRECTORY = Path("datadoc/tests/resources/") +TEST_RESOURCES_DIRECTORY = Path("tests/resources/") TEST_PARQUET_FILEPATH = TEST_RESOURCES_DIRECTORY / "person_data_v1.parquet" TEST_SAS7BDAT_FILEPATH = TEST_RESOURCES_DIRECTORY / "sasdata.sas7bdat"