From 327ebc426e32f2086c8b2af98d563d30e2da8f5d Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 10:31:38 -0800 Subject: [PATCH 1/6] update to ppt 1.4.5 --- .copier-answers.yml | 9 +- .github/pull_request_template.md | 63 ++++++++++++++ .github/workflows/asv-main.yml | 101 ++++++++++++++++++++++ .github/workflows/asv-nightly.yml | 93 ++++++++++++++++++++ .github/workflows/asv-pr.yml | 101 ++++++++++++++++++++++ .github/workflows/build-documentation.yml | 9 +- .github/workflows/linting.yml | 10 ++- .github/workflows/pre-commit-ci.yml | 34 ++++++++ .github/workflows/publish-to-pypi.yml | 10 +-- .github/workflows/smoke-test.yml | 10 +++ .gitignore | 6 ++ .pre-commit-config.yaml | 48 +++++----- .prepare_project.sh | 25 ++++++ .readthedocs.yml | 4 +- LICENSE | 2 +- benchmarks/__init__.py | 0 benchmarks/asv.conf.json | 77 +++++++++++++++++ benchmarks/benchmarks.py | 16 ++++ docs/index.rst | 37 ++++++++ docs/requirements.txt | 3 +- pyproject.toml | 20 ++++- src/tape/example_benchmarks.py | 14 +++ 22 files changed, 647 insertions(+), 45 deletions(-) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/asv-main.yml create mode 100644 .github/workflows/asv-nightly.yml create mode 100644 .github/workflows/asv-pr.yml create mode 100644 .github/workflows/pre-commit-ci.yml create mode 100644 .prepare_project.sh create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/asv.conf.json create mode 100644 benchmarks/benchmarks.py create mode 100644 src/tape/example_benchmarks.py diff --git a/.copier-answers.yml b/.copier-answers.yml index 5099344e..61d5ec87 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,15 +1,18 @@ # Changes here will be overwritten by Copier -_commit: v1.3.3 +_commit: v1.4.5 _src_path: gh:lincc-frameworks/python-project-template author_email: brantd@uw.edu author_name: Doug Branton -create_example_module: true +create_example_module: false custom_install: true +include_benchmarks: true +include_docs: true include_notebooks: true -module_name: tape mypy_type_checking: none +package_name: tape preferred_linter: black project_license: MIT project_name: tape +project_organization: lincc-frameworks use_gitlfs: none use_isort: false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..76e043ca --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,63 @@ + + +## Change Description + +- [ ] My PR includes a link to the issue that I am addressing + + + +## Solution Description + + + + +## Code Quality +- [ ] I have read the Contribution Guide +- [ ] My code follows the code style of this project +- [ ] My code builds (or compiles) cleanly without any errors or warnings +- [ ] My code contains relevant comments and necessary documentation + +## Project-Specific Pull Request Checklists + + +### Bug Fix Checklist +- [ ] My fix includes a new test that breaks as a result of the bug (if possible) +- [ ] My change includes a breaking change + - [ ] My change includes backwards compatibility and deprecation warnings (if possible) + +### New Feature Checklist +- [ ] I have added or updated the docstrings associated with my feature using the [NumPy docstring format](https://numpydoc.readthedocs.io/en/latest/format.html) +- [ ] I have updated the tutorial to highlight my new feature (if appropriate) +- [ ] I have added unit/End-to-End (E2E) test cases to cover my new feature +- [ ] My change includes a breaking change + - [ ] My change includes backwards compatibility and deprecation warnings (if possible) + +### Documentation Change Checklist +- [ ] Any updated docstrings use the [NumPy docstring format](https://numpydoc.readthedocs.io/en/latest/format.html) + +### Build/CI Change Checklist +- [ ] If required or optional dependencies have changed (including version numbers), I have updated the README to reflect this +- [ ] If this is a new CI setup, I have added the associated badge to the README + + + +### Other Change Checklist +- [ ] Any new or updated docstrings use the [NumPy docstring format](https://numpydoc.readthedocs.io/en/latest/format.html). +- [ ] I have updated the tutorial to highlight my new feature (if appropriate) +- [ ] I have added unit/End-to-End (E2E) test cases to cover any changes +- [ ] My change includes a breaking change + - [ ] My change includes backwards compatibility and deprecation warnings (if possible) diff --git a/.github/workflows/asv-main.yml b/.github/workflows/asv-main.yml new file mode 100644 index 00000000..f9fd7004 --- /dev/null +++ b/.github/workflows/asv-main.yml @@ -0,0 +1,101 @@ +# This workflow will run benchmarks with airspeed velocity (asv), +# store the new results in the "benchmarks" branch and publish them +# to a dashboard on GH Pages. + +name: Run ASV benchmarks for main + +on: + push: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + setup-python: + runs-on: ubuntu-latest + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-main: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + contents: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv tabulate + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Fetch previous results from the "benchmarks" branch + run: | + if git ls-remote --exit-code origin benchmarks > /dev/null 2>&1; then + git merge origin/benchmarks \ + --allow-unrelated-histories \ + --no-commit + mv ../_results . + fi + + - name: Run ASV for the main branch + run: asv run ALL --skip-existing --verbose || true + + - name: Submit new results to the "benchmarks" branch + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: benchmarks + folder: ${{ env.WORKING_DIR }}/_results + target-folder: _results + + - name: Generate dashboard HTML + run: | + asv show + asv publish + + - name: Deploy to Github pages + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: gh-pages + folder: ${{ env.WORKING_DIR }}/_html \ No newline at end of file diff --git a/.github/workflows/asv-nightly.yml b/.github/workflows/asv-nightly.yml new file mode 100644 index 00000000..fa8012cd --- /dev/null +++ b/.github/workflows/asv-nightly.yml @@ -0,0 +1,93 @@ +# This workflow will run daily at 06:45. +# It will run benchmarks with airspeed velocity (asv) +# and compare performance with the previous nightly build. + +name: Run benchmarks nightly job + +on: + schedule: + - cron: 45 6 * * * + workflow_dispatch: + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + NIGHTLY_HASH_FILE: nightly-hash + +jobs: + + asv-nightly: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout main branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Fetch previous results from the "benchmarks" branch + run: | + if git ls-remote --exit-code origin benchmarks > /dev/null 2>&1; then + git merge origin/benchmarks \ + --allow-unrelated-histories \ + --no-commit + mv ../_results . + fi + + - name: Get nightly dates under comparison + id: nightly-dates + run: | + echo "yesterday=$(date -d yesterday +'%Y-%m-%d')" >> $GITHUB_OUTPUT + echo "today=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + + - name: Use last nightly commit hash from cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.yesterday }} + + - name: Run comparison of main against last nightly build + run: | + HASH_FILE=${{ env.NIGHTLY_HASH_FILE }} + CURRENT_HASH=${{ github.sha }} + if [ -f $HASH_FILE ]; then + PREV_HASH=$(cat $HASH_FILE) + asv continuous $PREV_HASH $CURRENT_HASH --verbose || true + asv compare $PREV_HASH $CURRENT_HASH --sort ratio --verbose + fi + echo $CURRENT_HASH > $HASH_FILE + + - name: Update last nightly hash in cache + uses: actions/cache@v3 + with: + path: ${{ env.WORKING_DIR }} + key: nightly-results-${{ steps.nightly-dates.outputs.today }} \ No newline at end of file diff --git a/.github/workflows/asv-pr.yml b/.github/workflows/asv-pr.yml new file mode 100644 index 00000000..bef22084 --- /dev/null +++ b/.github/workflows/asv-pr.yml @@ -0,0 +1,101 @@ +# This workflow will run benchmarks with airspeed velocity (asv) for pull requests. +# It will compare the performance of the main branch with the performance of the merge +# with the new changes and publish a comment with this assessment. + +name: Run ASV benchmarks for PR + +on: + pull_request: + branches: [ main ] + +env: + PYTHON_VERSION: "3.10" + WORKING_DIR: ${{ github.workspace }}/benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + setup-python: + runs-on: ubuntu-latest + + steps: + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + asv-pr: + runs-on: ubuntu-latest + needs: setup-python + + permissions: + actions: read + pull-requests: write + + defaults: + run: + working-directory: ${{ env.WORKING_DIR }} + + steps: + - name: Checkout PR branch of the repository + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cache Python ${{ env.PYTHON_VERSION }} + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: python-${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install asv==0.6.1 virtualenv tabulate lf-asv-formatter + + - name: Get current job logs URL + uses: Tiryoh/gha-jobid-action@v0 + id: jobs + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + job_name: ${{ github.job }} + + - name: Create ASV machine config file + run: asv machine --machine gh-runner --yes + + - name: Run comparison of PR against main branch + run: | + git remote add upstream https://github.com/${{ github.repository }}.git + git fetch upstream + asv continuous upstream/main HEAD --verbose || true + asv compare upstream/main HEAD --sort ratio --verbose | tee output + python -m lf_asv_formatter --asv_version "$(echo asv --version)" + printf "\n\nClick [here]($STEP_URL) to view all benchmarks." >> output + env: + STEP_URL: "${{ steps.jobs.outputs.html_url }}#step:8:1" + + - name: Find benchmarks comment + uses: peter-evans/find-comment@v2 + id: find-comment + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: view all benchmarks + + - name: Create or update benchmarks comment + uses: peter-evans/create-or-update-comment@v3 + with: + comment-id: ${{ steps.find-comment.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body-path: ${{ env.WORKING_DIR }}/output + edit-mode: replace \ No newline at end of file diff --git a/.github/workflows/build-documentation.yml b/.github/workflows/build-documentation.yml index ae6d1f76..35e5f72b 100644 --- a/.github/workflows/build-documentation.yml +++ b/.github/workflows/build-documentation.yml @@ -9,6 +9,10 @@ on: pull_request: branches: [ main ] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: @@ -24,12 +28,11 @@ jobs: run: | sudo apt-get update python -m pip install --upgrade pip + if [ -f docs/requirements.txt ]; then pip install -r docs/requirements.txt; fi pip install . - pip install .[dev] - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Install notebook requirements run: | sudo apt-get install pandoc - name: Build docs run: | - sphinx-build -T -E -b html -d docs/build/doctrees ./docs docs/build/html \ No newline at end of file + sphinx-build -T -E -b html -d docs/build/doctrees ./docs docs/build/html diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index ef0010d1..8fbcc0f5 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -9,6 +9,10 @@ on: pull_request: branches: [ main ] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: ubuntu-latest @@ -18,10 +22,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: '3.10' - name: Install dependencies run: | sudo apt-get update @@ -30,8 +34,6 @@ jobs: pip install .[dev] if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Analyze code with linter - uses: psf/black@stable with: src: ./src - diff --git a/.github/workflows/pre-commit-ci.yml b/.github/workflows/pre-commit-ci.yml new file mode 100644 index 00000000..8397877c --- /dev/null +++ b/.github/workflows/pre-commit-ci.yml @@ -0,0 +1,34 @@ +# This workflow runs pre-commit hooks on pull requests to enforce coding style. +# To ensure correct configuration, please refer to: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_precommit.html + +name: Run pre-commit hooks + +on: + pull_request: + +jobs: + pre-commit-ci: + runs-on: ubuntu-latest + env: + SKIP: "check-lincc-frameworks-template-version,pytest-check,no-commit-to-branch,validate-pyproject,check-added-large-files,sphinx-build" + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + sudo apt-get update + python -m pip install --upgrade pip + pip install . + pip install .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --from-ref ${{ github.event.pull_request.base.sha }} --to-ref ${{ github.event.pull_request.head.sha }} + - uses: pre-commit-ci/lite-action@v1.0.1 + if: always() \ No newline at end of file diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index cfd43b62..e95231d0 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -1,5 +1,5 @@ # This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries +# For more information see: https://github.com/pypa/gh-action-pypi-publish#trusted-publishing # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by @@ -19,7 +19,8 @@ jobs: deploy: runs-on: ubuntu-latest - + permissions: + id-token: write steps: - uses: actions/checkout@v3 - name: Set up Python @@ -33,7 +34,4 @@ jobs: - name: Build package run: python -m build - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/smoke-test.yml b/.github/workflows/smoke-test.yml index a3e58965..beef88bf 100644 --- a/.github/workflows/smoke-test.yml +++ b/.github/workflows/smoke-test.yml @@ -1,11 +1,18 @@ # This workflow will run daily at 06:45. # It will install Python dependencies and run tests with a variety of Python versions. +# See documentation for help debugging smoke test issues: +# https://lincc-ppt.readthedocs.io/en/latest/practices/ci_testing.html#version-culprit name: Unit test smoke test on: + + # Runs this workflow automatically schedule: - cron: 45 6 * * * + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: build: @@ -28,6 +35,9 @@ jobs: pip install . pip install .[dev] if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: List dependencies + run: | + pip list - name: Run unit tests with pytest run: | python -m pytest tests diff --git a/.gitignore b/.gitignore index f83e6018..8baa3870 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,10 @@ dask-worker-space/ # tmp directory tmp/ + +# Mac OS .DS_Store + +# Airspeed Velocity performance results +_results/ +_html/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2bbc4637..a5c887ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,11 @@ +fail_fast: true repos: # Compare the local template version to the latest remote template version # This hook should always pass. It will print a message if the local version # is out of date. - repo: https://github.com/lincc-frameworks/pre-commit-hooks - rev: v0.1 + rev: v0.1.1 hooks: - id: check-lincc-frameworks-template-version name: Check template version @@ -22,22 +23,7 @@ repos: language: system entry: jupyter nbconvert --clear-output - # Run unit tests, verify that they pass. Note that coverage is run against - # the ./src directory here because that is what will be committed. In the - # github workflow script, the coverage is run against the installed package - # and uploaded to Codecov by calling pytest like so: - # `python -m pytest --cov= --cov-report=xml` - - repo: local - hooks: - - id: pytest-check - name: Run unit tests - description: Run unit tests with pytest. - entry: bash -c "if python -m pytest --co -qq; then python -m pytest --cov=./src --cov-report=html; fi" - language: system - pass_filenames: false - always_run: true - - # prevents committing directly branches named 'main' and 'master'. + # Prevents committing directly branches named 'main' and 'master'. - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: @@ -49,7 +35,7 @@ repos: description: Prevent the user from committing very large files. args: ['--maxkb=500'] - # verify that pyproject.toml is well formed + # Verify that pyproject.toml is well formed - repo: https://github.com/abravalheri/validate-pyproject rev: v0.12.1 hooks: @@ -58,13 +44,15 @@ repos: description: Verify that pyproject.toml adheres to the established schema. + + # Analyze the code style and report code that doesn't adhere. - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.7.0 hooks: - - id: black - types: [python] - files: ^(src|tests)/ + - id: black-jupyter + name: Format code using black + types_or: [python, pyi, jupyter] # It is recommended to specify the latest version of Python # supported by your project here, or alternatively use # pre-commit's default_language_version, see @@ -72,6 +60,22 @@ repos: language_version: python3.10 + + + # Run unit tests, verify that they pass. Note that coverage is run against + # the ./src directory here because that is what will be committed. In the + # github workflow script, the coverage is run against the installed package + # and uploaded to Codecov by calling pytest like so: + # `python -m pytest --cov= --cov-report=xml` + - repo: local + hooks: + - id: pytest-check + name: Run unit tests + description: Run unit tests with pytest. + entry: bash -c "if python -m pytest --co -qq; then python -m pytest --cov=./src --cov-report=html; fi" + language: system + pass_filenames: false + always_run: true # Make sure Sphinx can build the documentation while explicitly omitting # notebooks from the docs, so users don't have to wait through the execution # of each notebook or each commit. By default, these will be checked in the diff --git a/.prepare_project.sh b/.prepare_project.sh new file mode 100644 index 00000000..17b1b5e8 --- /dev/null +++ b/.prepare_project.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +echo "Initializing local git repository" +{ + gitversion=( $(git version | sed 's/^.* //;s/\./ /g') ) + if let "${gitversion[0]}<2"; then + # manipulate directly + git init . && echo 'ref: refs/heads/main' >.git/HEAD + elif let "${gitversion[0]}==2 & ${gitversion[1]}<34"; then + # rename master to main + git init . && { git branch -m master main 2>/dev/null || true; }; + else + # set the initial branch name to main + git init --initial-branch=main >/dev/null + fi +} > /dev/null + +echo "Installing package and runtime dependencies in local environment" +pip install -e . > /dev/null + +echo "Installing developer dependencies in local environment" +pip install -e .'[dev]' > /dev/null + +echo "Installing pre-commit" +pre-commit install > /dev/null diff --git a/.readthedocs.yml b/.readthedocs.yml index 817d2b0b..79bfc272 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,6 +1,6 @@ # .readthedocs.yml # Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 @@ -19,4 +19,4 @@ python: install: - requirements: docs/requirements.txt - method: pip - path: . \ No newline at end of file + path: . diff --git a/LICENSE b/LICENSE index b30da8c7..eb11b2a7 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 00000000..465e6da1 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,77 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + // The name of the project being benchmarked. + "project": "tape", + // The project's homepage. + "project_url": "https://github.com/lincc-frameworks/tape", + // The URL or local path of the source code repository for the + // project being benchmarked. + "repo": "..", + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": [ + "HEAD" + ], + "build_command": [ + "python -m build --wheel -o {build_cache_dir} {build_dir}" + ], + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/lincc-frameworks/tape/commit", + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": [ + "3.10" + ], + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "Cython": [], + "build": [], + "packaging": [] + }, + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks". + "benchmark_dir": ".", + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env". + "env_dir": "env", + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "_results", + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "_html", + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8 + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} \ No newline at end of file diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py new file mode 100644 index 00000000..ca3ef325 --- /dev/null +++ b/benchmarks/benchmarks.py @@ -0,0 +1,16 @@ +"""Two sample benchmarks to compute runtime and memory usage. + +For more information on writing benchmarks: +https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" + +import example_benchmarks + + +def time_computation(): + """Time computations are prefixed with 'time'.""" + example_benchmarks.runtime_computation() + + +def mem_list(): + """Memory computations are prefixed with 'mem' or 'peakmem'.""" + return example_benchmarks.memory_computation() diff --git a/docs/index.rst b/docs/index.rst index 60c4d4dc..e7c0bf63 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,6 +26,43 @@ API Reference section. +Dev Guide - Getting Started +--------------------------- + +Before installing any dependencies or writing code, it's a great idea to create a +virtual environment. LINCC-Frameworks engineers primarily use `conda` to manage virtual +environments. If you have conda installed locally, you can run the following to +create and activate a new environment. + +.. code-block:: bash + + >> conda create env -n python=3.10 + >> conda activate + + +Once you have created a new environment, you can install this project for local +development using the following commands: + +.. code-block:: bash + + >> pip install -e .'[dev]' + >> pre-commit install + >> conda install pandoc + + +Notes: + +1) The single quotes around ``'[dev]'`` may not be required for your operating system. +2) ``pre-commit install`` will initialize pre-commit for this local repository, so + that a set of tests will be run prior to completing a local commit. For more + information, see the Python Project Template documentation on + `pre-commit `_. +3) Installing ``pandoc`` allows you to verify that automatic rendering of Jupyter notebooks + into documentation for ReadTheDocs works as expected. For more information, see + the Python Project Template documentation on + `Sphinx and Python Notebooks `_. + + .. toctree:: :hidden: diff --git a/docs/requirements.txt b/docs/requirements.txt index a1b35287..e97c0699 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,5 @@ sphinx -sphinx_rtd_theme +sphinx-rtd-theme sphinx-autoapi nbsphinx ipython @@ -8,3 +8,4 @@ jupyter matplotlib eztao ray +numpy diff --git a/pyproject.toml b/pyproject.toml index a81dd19f..849a862b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dev = [ "pytest-cov", # Used to report total code coverage "pre-commit", # Used to run checks before finalizing a git commit "sphinx", # Used to automatically generate documentation - "sphinx_rtd_theme", # Used to render documentation + "sphinx-rtd-theme", # Used to render documentation "sphinx-autoapi", # Used to automatically generate api documentation "black", # Used for static linting of files # if you add dependencies here while experimenting in a notebook and you @@ -49,16 +49,22 @@ dev = [ "matplotlib", # Used in sample notebook intro_notebook.ipynb "eztao==0.4.1", # Used in Structure Function example notebook "bokeh", # Used to render dask client dashboard in Scaling to Large Data notebook - "ray[default]" # Used in the Ray on Ensemble notebook + "ray[default]", # Used in the Ray on Ensemble notebook + "asv==0.6.1", # Used to compute performance benchmarks ] [project.urls] "Source code" = "https://github.com/lincc-frameworks/tape" "Documentation" = "https://tape.readthedocs.io/" +# TODO: Get this working or just chuck it +# [metadata] +# long_description = { file = "README.md" } +# url = "https://github.com/lincc-frameworks/tape" + [build-system] requires = [ - "setuptools>=45", # Used to build and package the Python project + "setuptools>=62", # Used to build and package the Python project "setuptools_scm>=6.2", # Gets release version from git. Makes it available programmatically ] build-backend = "setuptools.build_meta" @@ -66,7 +72,15 @@ build-backend = "setuptools.build_meta" [tool.setuptools_scm] write_to = "src/tape/_version.py" +[tool.pytest.ini_options] +testpaths = [ + "tests", +] + [tool.black] line-length = 110 +target-version = ["py38"] +[tool.coverage.run] +omit=["src/tape/_version.py"] diff --git a/src/tape/example_benchmarks.py b/src/tape/example_benchmarks.py new file mode 100644 index 00000000..5a77b06c --- /dev/null +++ b/src/tape/example_benchmarks.py @@ -0,0 +1,14 @@ +"""An example module containing simplistic methods under benchmarking.""" + +import random +import time + + +def runtime_computation(): + """Runtime computation consuming between 0 and 5 seconds.""" + time.sleep(random.uniform(0, 5)) + + +def memory_computation(): + """Memory computation for a random list up to 512 samples.""" + return [0] * random.randint(0, 512) From 8809eeb44b62e33e923c06ef7601a57b9b0c58fd Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 11:02:06 -0800 Subject: [PATCH 2/6] try basic workflow --- benchmarks/benchmarks.py | 62 +++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index ca3ef325..fc78df25 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,14 +3,62 @@ For more information on writing benchmarks: https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" -import example_benchmarks +#import example_benchmarks +import numpy as np +import pandas as pd +from tape.ensemble import Ensemble -def time_computation(): - """Time computations are prefixed with 'time'.""" - example_benchmarks.runtime_computation() +#def time_computation(): +# """Time computations are prefixed with 'time'.""" +# example_benchmarks.runtime_computation() -def mem_list(): - """Memory computations are prefixed with 'mem' or 'peakmem'.""" - return example_benchmarks.memory_computation() +#def mem_list(): +# """Memory computations are prefixed with 'mem' or 'peakmem'.""" +# return example_benchmarks.memory_computation() + + +def time_basic_workflow(): + np.random.seed(1) + + # Generate 10 astronomical objects + n_obj = 10 + ids = 8000 + np.arange(n_obj) + names = ids.astype(str) + object_table = pd.DataFrame( + { + "id": ids, + "name": names, + "ddf_bool": np.random.randint(0, 2, n_obj), # 0 if from deep drilling field, 1 otherwise + "libid_cadence": np.random.randint(1, 130, n_obj), + } + ) + + # Create 1000 lightcurves with 100 measurements each + lc_len = 100 + num_points = 1000 + all_bands = np.array(["r", "g", "b", "i"]) + source_table = pd.DataFrame( + { + "id": 8000 + (np.arange(num_points) % n_obj), + "time": np.arange(num_points), + "flux": np.random.random_sample(size=num_points)*10, + "band": np.repeat(all_bands, num_points / len(all_bands)), + "error": np.random.random_sample(size=num_points), + "count": np.arange(num_points), + }, + ) + + ens = Ensemble() # initialize an ensemble object + + # Read in the generated lightcurve data + ens.from_pandas( + source_frame=source_table, + object_frame=object_table, + id_col="id", + time_col="time", + flux_col="flux", + err_col="error", + band_col="band", + npartitions=1) From f983c5c867e2e671bc079db22ae470f5edcd7d01 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 11:06:58 -0800 Subject: [PATCH 3/6] remove example file --- benchmarks/benchmarks.py | 12 ------------ src/tape/example_benchmarks.py | 14 -------------- 2 files changed, 26 deletions(-) delete mode 100644 src/tape/example_benchmarks.py diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index fc78df25..8672bd2e 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,22 +3,10 @@ For more information on writing benchmarks: https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" -#import example_benchmarks import numpy as np import pandas as pd from tape.ensemble import Ensemble - -#def time_computation(): -# """Time computations are prefixed with 'time'.""" -# example_benchmarks.runtime_computation() - - -#def mem_list(): -# """Memory computations are prefixed with 'mem' or 'peakmem'.""" -# return example_benchmarks.memory_computation() - - def time_basic_workflow(): np.random.seed(1) diff --git a/src/tape/example_benchmarks.py b/src/tape/example_benchmarks.py deleted file mode 100644 index 5a77b06c..00000000 --- a/src/tape/example_benchmarks.py +++ /dev/null @@ -1,14 +0,0 @@ -"""An example module containing simplistic methods under benchmarking.""" - -import random -import time - - -def runtime_computation(): - """Runtime computation consuming between 0 and 5 seconds.""" - time.sleep(random.uniform(0, 5)) - - -def memory_computation(): - """Memory computation for a random list up to 512 samples.""" - return [0] * random.randint(0, 512) From 92e63d9629026df38e0762274336efed406cb1a8 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 11:36:39 -0800 Subject: [PATCH 4/6] add batch and prune/sync benchmarks --- benchmarks/benchmarks.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 8672bd2e..2cd48ea8 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -3,10 +3,45 @@ For more information on writing benchmarks: https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" +import os import numpy as np import pandas as pd +import tape from tape.ensemble import Ensemble + +TESTDATA_PATH = os.path.join(os.path.dirname(__file__), "..", "tests", "tape_tests", "data") + + +def load_parquet_data(): + return tape.read_parquet( + source_file=os.path.join(TESTDATA_PATH, "source", "test_source.parquet"), + object_file=os.path.join(TESTDATA_PATH, "object", "test_object.parquet"), + dask_client=False, + id_col="ps1_objid", + time_col="midPointTai", + band_col="filterName", + flux_col="psFlux", + err_col="psFluxErr", + ) + + +def time_batch(): + """Time a simple batch command""" + ens = load_parquet_data() + + res = ens.batch(np.mean, "psFlux") + res.compute() + + +def time_prune_sync_workflow(): + """Test a filter (using prune) -> sync workflow""" + ens = load_parquet_data() + + ens.prune(50) # calc nobs -> cut any object with nobs<50 + ens.source.head(5) # should call sync implicitly + + def time_basic_workflow(): np.random.seed(1) From e96f3c3bda566987faf206e4450b50a9b1a5a62b Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 11:44:24 -0800 Subject: [PATCH 5/6] remove initial benchmark --- benchmarks/benchmarks.py | 45 ---------------------------------------- 1 file changed, 45 deletions(-) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 2cd48ea8..6aa54c4e 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -40,48 +40,3 @@ def time_prune_sync_workflow(): ens.prune(50) # calc nobs -> cut any object with nobs<50 ens.source.head(5) # should call sync implicitly - - -def time_basic_workflow(): - np.random.seed(1) - - # Generate 10 astronomical objects - n_obj = 10 - ids = 8000 + np.arange(n_obj) - names = ids.astype(str) - object_table = pd.DataFrame( - { - "id": ids, - "name": names, - "ddf_bool": np.random.randint(0, 2, n_obj), # 0 if from deep drilling field, 1 otherwise - "libid_cadence": np.random.randint(1, 130, n_obj), - } - ) - - # Create 1000 lightcurves with 100 measurements each - lc_len = 100 - num_points = 1000 - all_bands = np.array(["r", "g", "b", "i"]) - source_table = pd.DataFrame( - { - "id": 8000 + (np.arange(num_points) % n_obj), - "time": np.arange(num_points), - "flux": np.random.random_sample(size=num_points)*10, - "band": np.repeat(all_bands, num_points / len(all_bands)), - "error": np.random.random_sample(size=num_points), - "count": np.arange(num_points), - }, - ) - - ens = Ensemble() # initialize an ensemble object - - # Read in the generated lightcurve data - ens.from_pandas( - source_frame=source_table, - object_frame=object_table, - id_col="id", - time_col="time", - flux_col="flux", - err_col="error", - band_col="band", - npartitions=1) From 58f79d7b71abea055a85d6e02d46e2ab4deca5d3 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 2 Jan 2024 14:22:29 -0800 Subject: [PATCH 6/6] review tweaks --- .github/pull_request_template.md | 3 +-- benchmarks/benchmarks.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 76e043ca..e0847d86 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,12 +26,11 @@ If it fixes an open issue, please link to the issue here. If this PR closes an i ## Code Quality -- [ ] I have read the Contribution Guide -- [ ] My code follows the code style of this project - [ ] My code builds (or compiles) cleanly without any errors or warnings - [ ] My code contains relevant comments and necessary documentation ## Project-Specific Pull Request Checklists +- [ ] I have added a function that requires a sync_tables command, and have added the neccesary sync_tables call ### Bug Fix Checklist diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 6aa54c4e..7622da84 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -1,13 +1,11 @@ -"""Two sample benchmarks to compute runtime and memory usage. +"""A suite of TAPE Benchmarks. For more information on writing benchmarks: https://asv.readthedocs.io/en/stable/writing_benchmarks.html.""" import os import numpy as np -import pandas as pd import tape -from tape.ensemble import Ensemble TESTDATA_PATH = os.path.join(os.path.dirname(__file__), "..", "tests", "tape_tests", "data")