feat: better evaluation scripts

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see:
name: Tests
branches: [ main ]
branches: [ main ]
runs-on: ${{ matrix.os }}
fail-fast: false
os: [ubuntu-latest]
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12-dev']
# custom tests
# other OS version necessary
- os: ubuntu-20.04
python-version: '3.6'
- os: ubuntu-20.04
python-version: '3.7'
# common versions on MacOS
- os: macos-latest
python-version: '3.7'
- os: macos-latest
python-version: '3.8'
# common versions on Windows
- os: windows-latest
python-version: '3.7'
experimental: true
allowed_failure: true
- os: windows-latest
python-version: '3.8'
experimental: true
allowed_failure: true
# Python and pip setup
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
uses: actions/cache@v2
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
# package setup
- uses: actions/checkout@v3
- name: Install dependencies
if: matrix.python-version != '3.6' && matrix.python-version != '3.7'
run: pip install -r requirements-dev.txt
- name: Install training dependencies
if: matrix.python-version != '3.6' && matrix.python-version != '3.7'
run: pip install -r training/requirements.txt
- name: Install dependencies (legacy versions)
if: matrix.python-version == '3.6' || matrix.python-version == '3.7'
run: |
python -m pip install --upgrade flake8
python -m pip install --upgrade black
python -m pip install --upgrade mypy
python -m pip install --upgrade pytest pytest-cov
# tests
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Code format with black
run: black --check --diff simplemma training tests
- name: Type checking with mypy
if: matrix.python-version != '3.6'
run: mypy -p simplemma -p training -p tests
- name: Test with pytest
run: pytest --cov=./ --cov-report=xml
# coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
fail_ci_if_error: true
files: ./coverage.xml
verbose: true