Skip to content

Save raw data in parallel runs #42

Save raw data in parallel runs

Save raw data in parallel runs #42

# This workflow is designed to be manually triggered only.
name: Save raw data in parallel runs
on:
workflow_dispatch: # allow manual trigger
jobs:
generate-save-data:
runs-on: ${{ matrix.config.os }}
name: ${{ matrix.script-run }}, ${{ matrix.data-year }} (${{ matrix.config.os}}, ${{ matrix.config.r}})
strategy:
fail-fast: false
matrix:
config:
- {os: ubuntu-latest, r: 'release'}
script-run: # all in data-raw/
- BEAData.R
- CensusData.R
- EIAData.R
- FAFData.R
- USASpendingData.R
data-year:
- 2012
- 2013
- 2014
- 2015
- 2016
- 2017
- 2018
- 2019
- 2020
- 2021
- 2022
- 2023
# env:
# R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: devtools
# Release disk space on runners
- name: Release disk space on runners
run: |
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
# Generate data and save as .rds files
- name: Generate data and save as .rds files (metadata saved as .json files)
run: |
devtools::load_all()
year <- ${{ matrix.data-year }}
options(timeout = 1000)
source("data-raw/${{ matrix.script-run }}")
shell: Rscript {0}
# Upload .rds files
- name: Upload .rds files and prepare zip for manual download
uses: actions/upload-artifact@v4
with:
# Artifact name
name: data - ${{ matrix.script-run }} - ${{ matrix.data-year }}
# A file, directory or wildcard patter that describes what to upload
path: "data/*.rds"
if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn`
# retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise.
# Upload .json files
- name: Upload .json files and prepare zip for manual download
uses: actions/upload-artifact@v4
with:
# Artifact name
name: metadata - ${{ matrix.script-run }} - ${{ matrix.data-year }}
# A file, directory or wildcard patter that describes what to upload
path: "inst/extdata/metadata/*.json"
if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn`
# retention-days: 5 # cannot exceed the retention limit set by the repository, organization, or enterprise.
merge:
name: Combine data objects
runs-on: ubuntu-latest
needs: generate-save-data
if: always() && !cancelled()
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@v4
with:
name: raw data