.github/workflows/data_update_v2.yml

name: data.v2.json update

on:
  schedule:
    # Run at 3 AM PDT every day
    - cron: 0 10 * * *
  workflow_dispatch: {}

jobs:
  # This workflow contains a single job called "build"
  build:
    runs-on: ubuntu-latest

    steps:
    # Checkout the main repo
    - name: Checkout Main repo
      uses: actions/checkout@v2
      with:
        ssh-key: ${{ secrets.data_daily_update }}
        path: site

    # Checkout data scraper repo
    - name: Checkout Data Scraper
      uses: actions/checkout@v2
      with:
        ssh-key: ${{ secrets.data_daily_update }}
        repository: sfbrigade/data-covid19-sfbayarea
        path: scraper

    # The scraper uses Python 3.8+, so make sure we've got the latest 3.x
    - name: Set up Python 3.x
      uses: actions/setup-python@v1
      with:
        python-version: '3.x'

    - name: Cache Python Dependencies
      uses: actions/cache@v2
      with:
        path: ~/.cache/pip
        # NOTE: we can hash these requirements files together, but keeping them
        # separate allows us to load a partial cache if only the dev
        # requirements have changed.
        key: ${{ runner.os }}-pip-${{ hashFiles('scraper/requirements.txt') }}-${{ hashFiles('scraper/requirements-dev.txt') }}
        restore-keys: |
          ${{ runner.os }}-pip-${{ hashFiles('scraper/requirements.txt') }}-
          ${{ runner.os }}-pip-

    # Install dependencies
    - name: Install libxml2-dev libxslt-dev
      run: sudo apt-get install libxml2-dev libxslt-dev

    # - The commit that was checked out will be available as $SCRAPER_COMMIT.
    - name: Install Data Scraper & Dependencies
      run: |
        cd ${GITHUB_WORKSPACE}/scraper
        python -m pip install --upgrade pip
        pip install -r requirements.txt;

        # Keep track of the version used so we can use it in commit messages
        echo "SCRAPER_COMMIT='$(git rev-parse HEAD)'" >> $GITHUB_ENV

    - name: Scrape Data
      id: scrape_data
      # The scraper may error on some counties but succeed on others. We want to
      # continue so that we still generate PRs for partial successes. We'll handle
      # failures in a later step.
      continue-on-error: true
      env:
        SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
      run: |
        echo "SCRAPER_TIME='$(date)'" >> $GITHUB_ENV
        cd ${GITHUB_WORKSPACE}/scraper

        OUT_PATH="${GITHUB_WORKSPACE}/site/data/data.v2.json"

        python scraper_data.py > today.json 2> errors.txt || true

        # Merge new data into previous data. (Note this has to be two steps;
        # if we read $OUT_PATH as input for jq and write stdout to it at the
        # same time, we get conflicts and bad output.)
        jq -s '.[0] + .[1]' $OUT_PATH today.json > merged.json
        mv merged.json $OUT_PATH

        ERRORS=`cat errors.txt`
        if [ -n "${ERRORS}" ]; then
          echo "Encountered the following errors while scraping:"
          echo "------------------------------------------------"
          echo "${ERRORS}"
          
          # The error text can contain unescaped quotes, newlines, etc.
          # Use jq to make sure we are composing correctly formatted JSON.
          # `--raw-input` treats the input as strings instead of JSON.
          # `--slurp` causes all lines to be combined into one string.
          ERRORS_JSON=`cat errors.txt | jq --slurp --raw-input '{"text": .}'`
          curl -X POST -H 'Content-type: application/json' --data "${ERRORS_JSON}" $SLACK_WEBHOOK_URL
          
          # Raise an error so this step fails.
          false
        fi
    
    - name: Commit Changes
      run: |
        cd ${GITHUB_WORKSPACE}/site
        git config user.name ${{ secrets.githubaction_config_user_name }}
        git config user.email ${{ secrets.githubaction_config_user_email }}

        git add data/data.v2.json
        git commit -F - << EOF
        GitHubAction: data v2 update

        Created with commit ${{env.SCRAPER_COMMIT}} from sfbrigade/data-covid19-sfbayarea
        https://github.com/sfbrigade/data-covid19-sfbayarea/commit/${{env.SCRAPER_COMMIT}}
        EOF
        
        git push
        echo 'Git commit and push completed for the daily data update.'

    - name: Fail Workflow if the Scraper Had Errors
      if: ${{ steps.scrape_data.outcome == 'failure' }}
      run: |
        echo "Marking workflow as failed."
        false