From 1693d0db1c8f18bf904642b17a63d9418a77e47a Mon Sep 17 00:00:00 2001 From: Cameron Riddell Date: Tue, 6 Feb 2024 06:33:39 -0800 Subject: [PATCH 1/5] update REMARK markdown files for yaml restore markdown files --- REMARKs/Aiyagari-Idiosyncratic.yml | 3 +++ REMARKs/BayerLuetticke.yml | 4 ++++ REMARKs/BlanchardPA2019.bib | 9 --------- REMARKs/BlanchardPA2019.yml | 3 +++ REMARKs/BufferStock-LifeCycle.yml | 3 +++ REMARKs/BufferStockTheory.bib | 11 ----------- REMARKs/BufferStockTheory.yml | 3 +++ REMARKs/CGMPortfolio.yml | 3 +++ REMARKs/DistributionofWealthMPC.yml | 3 +++ REMARKs/DurableConsumerType.yml | 3 +++ REMARKs/EndogeneousRetirement.yml | 3 +++ REMARKs/EpiExp.yml | 3 +++ REMARKs/GanongNoelUI.yml | 3 +++ REMARKs/KrusellSmith.bib | 10 ---------- REMARKs/KrusellSmith.yml | 3 +++ REMARKs/LiqConstr.yml | 3 +++ REMARKs/Pandemic.md | 2 ++ REMARKs/Pandemic.yml | 3 +++ REMARKs/PortfolioChoiceBlogPost.yml | 3 +++ REMARKs/RiskyContrib.yml | 3 +++ REMARKs/Sequential Endogenous Grid Method.yml | 3 +++ REMARKs/SolvingMicroDSOPs.md | 2 +- REMARKs/SolvingMicroDSOPs.yml | 3 +++ REMARKs/cAndCwithStickyE.yml | 3 +++ REMARKs/ctDiscrete.yml | 3 +++ 25 files changed, 64 insertions(+), 31 deletions(-) create mode 100644 REMARKs/Aiyagari-Idiosyncratic.yml create mode 100644 REMARKs/BayerLuetticke.yml delete mode 100644 REMARKs/BlanchardPA2019.bib create mode 100644 REMARKs/BlanchardPA2019.yml create mode 100644 REMARKs/BufferStock-LifeCycle.yml delete mode 100644 REMARKs/BufferStockTheory.bib create mode 100644 REMARKs/BufferStockTheory.yml create mode 100644 REMARKs/CGMPortfolio.yml create mode 100644 REMARKs/DistributionofWealthMPC.yml create mode 100644 REMARKs/DurableConsumerType.yml create mode 100644 REMARKs/EndogeneousRetirement.yml create mode 100644 REMARKs/EpiExp.yml create mode 100644 REMARKs/GanongNoelUI.yml delete mode 100644 REMARKs/KrusellSmith.bib create mode 100644 REMARKs/KrusellSmith.yml create mode 100644 REMARKs/LiqConstr.yml create mode 100644 REMARKs/Pandemic.yml create mode 100644 REMARKs/PortfolioChoiceBlogPost.yml create mode 100644 REMARKs/RiskyContrib.yml create mode 100644 REMARKs/Sequential Endogenous Grid Method.yml create mode 100644 REMARKs/SolvingMicroDSOPs.yml create mode 100644 REMARKs/cAndCwithStickyE.yml create mode 100644 REMARKs/ctDiscrete.yml diff --git a/REMARKs/Aiyagari-Idiosyncratic.yml b/REMARKs/Aiyagari-Idiosyncratic.yml new file mode 100644 index 00000000..384fae27 --- /dev/null +++ b/REMARKs/Aiyagari-Idiosyncratic.yml @@ -0,0 +1,3 @@ +name: Aiyagari-Idiosyncratic +remote: https://github.com/econ-ark/Aiyagari-Idiosyncratic +title: Uninsured Idiosyncratic Risk and Aggregate Saving diff --git a/REMARKs/BayerLuetticke.yml b/REMARKs/BayerLuetticke.yml new file mode 100644 index 00000000..64db9ac7 --- /dev/null +++ b/REMARKs/BayerLuetticke.yml @@ -0,0 +1,4 @@ +name: BayerLuetticke +remote: https://github.com/econ-ark/BayerLuetticke +title: Solving heterogeneous agent models in discrete time with many idiosyncratic + states by perturbation methods diff --git a/REMARKs/BlanchardPA2019.bib b/REMARKs/BlanchardPA2019.bib deleted file mode 100644 index 96c6bae0..00000000 --- a/REMARKs/BlanchardPA2019.bib +++ /dev/null @@ -1,9 +0,0 @@ -@article{blanchard2019public, - title={Public debt and low interest rates}, - author={Blanchard, Olivier}, - journal={American Economic Review}, - volume={109}, - number={4}, - pages={1197--1229}, - year={2019} -} \ No newline at end of file diff --git a/REMARKs/BlanchardPA2019.yml b/REMARKs/BlanchardPA2019.yml new file mode 100644 index 00000000..c670cf26 --- /dev/null +++ b/REMARKs/BlanchardPA2019.yml @@ -0,0 +1,3 @@ +name: BlanchardPA2019 +remote: https://github.com/econ-ark/BlanchardPA2019 +title: Public Debt and Low Interest Rates diff --git a/REMARKs/BufferStock-LifeCycle.yml b/REMARKs/BufferStock-LifeCycle.yml new file mode 100644 index 00000000..297d8bba --- /dev/null +++ b/REMARKs/BufferStock-LifeCycle.yml @@ -0,0 +1,3 @@ +name: BufferStock-LifeCycle +remote: https://github.com/econ-ark/BufferStock-LifeCycle +title: Buffer-Stock Saving and the Life Cycle/Permanent Income Hypothesis diff --git a/REMARKs/BufferStockTheory.bib b/REMARKs/BufferStockTheory.bib deleted file mode 100644 index cb16d4bf..00000000 --- a/REMARKs/BufferStockTheory.bib +++ /dev/null @@ -1,11 +0,0 @@ -@techreport{BufferStockTheory, - author = {Christopher D. Carroll}, - journal = {Manuscript, Department of Economics, Johns Hopkins University}, - note = {Available at \url{http://econ.jhu.edu/people/ccarroll/papers/BufferStockTheory}}, - title = {Theoretical Foundations of Buffer Stock Saving}, - year = {2019}, - url = {http://econ.jhu.edu/people/ccarroll/papers/BufferStockTheory.pdf}, - institution = {Department of Economics, Johns Hopkins University}, - type = {manuscript} -} - diff --git a/REMARKs/BufferStockTheory.yml b/REMARKs/BufferStockTheory.yml new file mode 100644 index 00000000..a92fe5d8 --- /dev/null +++ b/REMARKs/BufferStockTheory.yml @@ -0,0 +1,3 @@ +name: BufferStockTheory +remote: https://github.com/econ-ark/BufferStockTheory +title: BufferStockTheory diff --git a/REMARKs/CGMPortfolio.yml b/REMARKs/CGMPortfolio.yml new file mode 100644 index 00000000..ba1157dd --- /dev/null +++ b/REMARKs/CGMPortfolio.yml @@ -0,0 +1,3 @@ +name: CGMPortfolio +remote: https://github.com/econ-ark/CGMPortfolio +title: 'REMARK: Consumption and Portfolio Choice Over the Life Cycle' diff --git a/REMARKs/DistributionofWealthMPC.yml b/REMARKs/DistributionofWealthMPC.yml new file mode 100644 index 00000000..819660f5 --- /dev/null +++ b/REMARKs/DistributionofWealthMPC.yml @@ -0,0 +1,3 @@ +name: DistributionofWealthMPC +remote: https://github.com/econ-ark/DistributionOfWealthMPC +title: The distribution of wealth and the marginal propensity to consume diff --git a/REMARKs/DurableConsumerType.yml b/REMARKs/DurableConsumerType.yml new file mode 100644 index 00000000..7bb4a153 --- /dev/null +++ b/REMARKs/DurableConsumerType.yml @@ -0,0 +1,3 @@ +name: DurableConsumerType +remote: https://github.com/econ-ark/DurableConsumerType_REMARK +title: A Guide on Solving Non-convex Consumption-Saving Models diff --git a/REMARKs/EndogeneousRetirement.yml b/REMARKs/EndogeneousRetirement.yml new file mode 100644 index 00000000..7b1a1289 --- /dev/null +++ b/REMARKs/EndogeneousRetirement.yml @@ -0,0 +1,3 @@ +name: EndogeneousRetirement +remote: https://github.com/econ-ark/EndogenousRetirement +title: 'Endogenous Retirement: A Canonical Discrete-Continuous Problem' diff --git a/REMARKs/EpiExp.yml b/REMARKs/EpiExp.yml new file mode 100644 index 00000000..737d29be --- /dev/null +++ b/REMARKs/EpiExp.yml @@ -0,0 +1,3 @@ +name: EpiExp +remote: https://github.com/econ-ark/EpiExp +title: Epidemiological Expectations in Economics diff --git a/REMARKs/GanongNoelUI.yml b/REMARKs/GanongNoelUI.yml new file mode 100644 index 00000000..d778854f --- /dev/null +++ b/REMARKs/GanongNoelUI.yml @@ -0,0 +1,3 @@ +name: GanongNoelUI +remote: https://github.com/econ-ark/GanongNoelUI +title: 'Consumer Spending during Unemployment: Positive and Normative Implications' diff --git a/REMARKs/KrusellSmith.bib b/REMARKs/KrusellSmith.bib deleted file mode 100644 index 020a79b3..00000000 --- a/REMARKs/KrusellSmith.bib +++ /dev/null @@ -1,10 +0,0 @@ -@article{krusell1998income, - title={Income and wealth heterogeneity in the macroeconomy}, - author={Krusell, Per and Smith, Jr, Anthony A}, - journal={Journal of political Economy}, - volume={106}, - number={5}, - pages={867--896}, - year={1998}, - publisher={The University of Chicago Press} -} \ No newline at end of file diff --git a/REMARKs/KrusellSmith.yml b/REMARKs/KrusellSmith.yml new file mode 100644 index 00000000..be5212a9 --- /dev/null +++ b/REMARKs/KrusellSmith.yml @@ -0,0 +1,3 @@ +name: KrusellSmith +remote: https://github.com/econ-ark/KrusellSmith +title: Income and Wealth Heterogeneity in the Macroeconomy diff --git a/REMARKs/LiqConstr.yml b/REMARKs/LiqConstr.yml new file mode 100644 index 00000000..429f90bf --- /dev/null +++ b/REMARKs/LiqConstr.yml @@ -0,0 +1,3 @@ +name: LiqConstr +remote: https://github.com/econ-ark/LiqConstr +title: Liquidity Constraints and Precautionary Saving diff --git a/REMARKs/Pandemic.md b/REMARKs/Pandemic.md index 922c2e9f..7aa48040 100644 --- a/REMARKs/Pandemic.md +++ b/REMARKs/Pandemic.md @@ -68,6 +68,8 @@ keywords: # optional # Pandemic-Consumption-Response +[![badge](https://img.shields.io/badge/Launch-Dashboard-579ACA.svg?logo=)](https://xhrtcvh6l53u.curvenote.dev/services/binder/v2/gh/econ-ark/Pandemic/HEAD?urlpath=/voila/render/Code/Python/dashboard.ipynb) + This repository is a complete software archive for the paper "Modeling the Consumption Response to the CARES Act" by Carroll, Crawley, Slacalek, and White (2020). This README file provides instructions for running our code on your own computer, as well as adjusting the parameters of the model to produce alternate versions of the figures in the paper. ## References diff --git a/REMARKs/Pandemic.yml b/REMARKs/Pandemic.yml new file mode 100644 index 00000000..d5a47230 --- /dev/null +++ b/REMARKs/Pandemic.yml @@ -0,0 +1,3 @@ +name: Pandemic +remote: https://github.com/econ-ark/Pandemic +title: Modeling the Consumption Response to the CARES Act diff --git a/REMARKs/PortfolioChoiceBlogPost.yml b/REMARKs/PortfolioChoiceBlogPost.yml new file mode 100644 index 00000000..a1598e5a --- /dev/null +++ b/REMARKs/PortfolioChoiceBlogPost.yml @@ -0,0 +1,3 @@ +name: PortfolioChoiceBlogPost +remote: https://github.com/econ-ark/PortfolioChoiceBlogPost +title: Optimal Financial Investment over the Life Cycle - Blog Post diff --git a/REMARKs/RiskyContrib.yml b/REMARKs/RiskyContrib.yml new file mode 100644 index 00000000..68155f70 --- /dev/null +++ b/REMARKs/RiskyContrib.yml @@ -0,0 +1,3 @@ +name: RiskyContrib +remote: https://github.com/econ-ark/RiskyContrib +title: A Two-Asset Savings Model with an Income-Contribution Scheme diff --git a/REMARKs/Sequential Endogenous Grid Method.yml b/REMARKs/Sequential Endogenous Grid Method.yml new file mode 100644 index 00000000..dcd72100 --- /dev/null +++ b/REMARKs/Sequential Endogenous Grid Method.yml @@ -0,0 +1,3 @@ +name: Sequential Endogenous Grid Method +remote: https://github.com/alanlujan91/SequentialEGM +title: 'EGM^n: The Sequential Endogenous Grid Method' diff --git a/REMARKs/SolvingMicroDSOPs.md b/REMARKs/SolvingMicroDSOPs.md index 517de179..9eab07fd 100644 --- a/REMARKs/SolvingMicroDSOPs.md +++ b/REMARKs/SolvingMicroDSOPs.md @@ -49,4 +49,4 @@ keywords: # optional # Solution Methods for Microeconomic Dynamic Stochastic Optimization Problems -These notes describe tools for solving microeconomic dynamic stochastic optimization problems, and show how to use those tools for efficiently estimating a standard life cycle consumption/saving model using microeconomic data. No attempt is made at a systematic overview of the many possible technical choices; instead, I present a specific set of methods that have proven useful in my own work (and explain why other popular methods, such as value function iteration, are a bad idea). Paired with these notes is Mathematica, Matlab, and Python software that solves the problems described in the text. +These notes describe tools for solving microeconomic dynamic stochastic optimization problems, and show how to use those tools for efficiently estimating a standard life cycle consumption/saving model using microeconomic data. No attempt is made at a systematic overview of the many possible technical choices; instead, I present a specific set of methods that have proven useful in my own work (and explain why other popular methods, such as value function iteration, are a bad idea). Paired with these notes is Mathematica, Matlab, and Python software that solves the problems described in the text. diff --git a/REMARKs/SolvingMicroDSOPs.yml b/REMARKs/SolvingMicroDSOPs.yml new file mode 100644 index 00000000..0bf4213f --- /dev/null +++ b/REMARKs/SolvingMicroDSOPs.yml @@ -0,0 +1,3 @@ +name: SolvingMicroDSOPs +remote: https://github.com/econ-ark/SolvingMicroDSOPs +title: Solution Methods for Microeconomic Dynamic Stochastic Optimization Problems diff --git a/REMARKs/cAndCwithStickyE.yml b/REMARKs/cAndCwithStickyE.yml new file mode 100644 index 00000000..00fd50fb --- /dev/null +++ b/REMARKs/cAndCwithStickyE.yml @@ -0,0 +1,3 @@ +name: cAndCwithStickyE +remote: https://github.com/econ-ark/cAndCwithStickyE +title: Sticky Expectations and Consumption Dynamics diff --git a/REMARKs/ctDiscrete.yml b/REMARKs/ctDiscrete.yml new file mode 100644 index 00000000..6152262a --- /dev/null +++ b/REMARKs/ctDiscrete.yml @@ -0,0 +1,3 @@ +name: ctDiscrete +remote: https://github.com/econ-ark/ctDiscrete +title: A Tractable Model of Buffer Stock Saving From 20efea4e6d8fdd2f630b06d5b503fbcbe6b719ae Mon Sep 17 00:00:00 2001 From: Cameron Riddell Date: Wed, 7 Feb 2024 10:13:58 -0800 Subject: [PATCH 2/5] Initial commit of REMARK CLI --- STANDARD.md | 21 +-- cli.py | 359 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 372 insertions(+), 8 deletions(-) create mode 100644 cli.py diff --git a/STANDARD.md b/STANDARD.md index 5b8a16b4..608c0f64 100644 --- a/STANDARD.md +++ b/STANDARD.md @@ -10,21 +10,26 @@ The PR should add a link to the repository to the Catalog (currently, in the REA ## The REMARK Standard +``` +. +|-- reproduce.sh +|-- CITATION.cff +`-- binder + `-- environment.yml +``` + The REMARK's repository must: 1. Have a [tagged release](https://docs.github.com/en/github/administering-a-repository/managing-releases-in-a-repository), the last commit before including it as a REMARK should be tagged with a 1.0 release. 2. In that repository at that release, there must be: - - In either the top-level directory or a `binder/` directory, either: - - installation files for `pip`: - - a `runtime.txt` containing the name of a python version, e.g. `python-3.9.0` - - a `requirements.txt` file with pinned dependencies (such as created by the command `pip freeze > requirements.txt`), or... - - installation files for conda: + - There must be a `binder/` directory containing an `environment.yml`: - an `environment.yml` file with pinned dependencies - A `reproduce.sh` script that - Installs the requirements - Runs and reproduces all the results -3. Include a valid CITATION.cff document with bibliographic -metadata for the repository. - +3. Include a valid CITATION.cff document with bibliographic metadata for the repository. +4. Subset of results that you are claiming to be reproducible. + - This could be specified as a directory? + It is **strongly recommended** to include: - If reproduce.sh takes longer than a few minutes, a `reproduce_min.sh` that generates some interesting subset of results within a few minutes - A Jupyter notebook that exposits the material being reproduced. diff --git a/cli.py b/cli.py new file mode 100644 index 00000000..ace6983e --- /dev/null +++ b/cli.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python + +from csv import DictWriter, DictReader +from collections import defaultdict +from datetime import datetime, timezone +from itertools import tee, islice +from json import loads +from os import getenv +import re + +from yaml import safe_load +from yaml.scanner import ScannerError + +from argparse import ArgumentParser +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, asdict +from io import StringIO +from os import environ +from pathlib import Path +from tempfile import TemporaryDirectory +from subprocess import run, PIPE, STDOUT +from urllib.parse import urlsplit + +@dataclass +class Metadata: + remote: str + local: Path + yaml: dict + image_name: str = None + + def __post_init__(self): + self.image_name = f'econ-ark/{self.local.name.lower().replace(" ", "_")}' + + def flat(self): + d = {**self.yaml, **asdict(self)} + del d['yaml'] + return d + + +def parse_paths_from_standard(text): + lines = text.splitlines() + lines.pop(0) # skip the '.' + + d = Path() + for prev, cur in zip(*(islice(it, i, None) for i, it in enumerate(tee(lines, 2)))): + _, _, prev_part = prev.partition('--') + _, _, cur_part = cur.partition('--') + cur_part, prev_part = cur_part.strip(), prev_part.strip() + + prev_indent = len(re.findall(r'\s{4}|\|\s{3}', prev)) + cur_indent = len(re.findall(r'\s{4}|\|\s{3}', cur)) + + if prev_indent == cur_indent: + yield d / prev_part + elif prev_indent > cur_indent: + yield d / prev_part + d = d.parent + else: + d = d / prev_part + + if prev_indent > cur_indent: + yield d.parent / cur_part + else: + yield d / cur_part + +def git_exists(local_repo_path): + return Path(local_repo_path).joinpath('.git').exists() + +def git_clone(local_repo_path, *, remote): + return run( + ['git', 'clone', '--depth', '1', '--single-branch', remote, local_repo_path] + ) + +def git_pull(local_repo_path, *, remote_name=None): + return run(['git', 'pull'], cwd=local_repo_path) + +def git_update_remotes(local_repo_path, *, remote_dict): + returns = {} + for name, url in remote_dict.items(): + returns[name] = run(['git', 'remote', 'set-url', name, url], cwd=local_repo_path) + if returns[name].returncode != 0: + returns[name] = run(['git', 'remote', 'add', name, url], cwd=local_repo_path) + return returns + +def build_docker(local_repo, image_name): + cmd = ['repo2docker', '--no-run', '--image-name', image_name, local_repo.resolve()] + return run(cmd, stdout=PIPE, stderr=STDOUT, encoding='utf-8') + +def execute_docker(local_repo, image_name): + # repo2docker names the Python execution conda environment: "kernel" | "notebook" + # kernel is used if the notebook env has incompat libraries or Python version + # notebook should be used in other cases. + docker_prefix = [ + 'docker', 'run', '-it', '--entrypoint', '', + '--mount', f'type=bind,source={local_repo.resolve()},target={getenv("HOME")}', + image_name, + ] + + envs_list_proc = run( + [*docker_prefix, 'conda', 'env', 'list', '--json'], + stdout=PIPE, stderr=STDOUT, encoding='utf-8' + ) + envs = loads(envs_list_proc.stdout)['envs'] + + priority = ['/srv/conda/envs/kernel', '/srv/conda/envs/notebook'] + for prefix in priority: + if prefix in envs: + cmd_prefix = ['conda', 'run', '-p', prefix] + break + else: + cmd_prefix = [] + + return run( + [*docker_prefix, *cmd_prefix, 'bash', './reproduce.sh'], + stdout=PIPE, stderr=STDOUT, encoding='utf-8' + ) + +def clean_docker(image_name): + cmd = ['docker', 'rmi', '--force', image_name] + return run(cmd, encoding='utf-8') + +def build_conda(local_repo): + cmd = ['conda', 'env', 'update', '-f', 'binder/environment.yml', '--prefix', './condaenv'] + proc = run(cmd, stdout=PIPE, stderr=STDOUT, encoding='utf-8', cwd=local_repo) + if proc.returncode == 0: + with open(local_repo / 'condaenv' / '.gitignore', 'w') as f: + f.write('*') + return proc + +def execute_conda(local_repo): + cmd = ['conda', 'run', '-p', './condaenv', getenv('SHELL', default='/bin/bash'), 'reproduce.sh'] + return run(cmd, stdout=PIPE, stderr=STDOUT, encoding='utf-8', cwd=local_repo) + +def clean_conda(local_repo): + cmd = ['conda', 'env', 'remove', '--prefix', './condaenv', '--yes', '--quiet'] + return run(cmd, encoding='utf-8', cwd=local_repo) + +if __name__ == '__main__': + git_root = Path(__file__).parent + remark_home = git_root / '_REMARK' + repo_home = remark_home / 'repos' + repo_home.mkdir(exist_ok=True, parents=True) + + + with open(remark_home / '.gitignore', 'w') as f: + f.write('**') + + metadata = {} + for p in git_root.joinpath('REMARKs').glob('*.yml'): + with open(p) as f: + data = safe_load(f) + metadata[p.stem] = Metadata( + local=repo_home / data['name'], + remote=data['remote'], + yaml=data, + ) + + parser = ArgumentParser() + subparsers = parser.add_subparsers(dest='action') + + # pull/fetch + pull_parser = subparsers.add_parser('pull') + pull_group = pull_parser.add_mutually_exclusive_group(required=True) + pull_group.add_argument('remark', default=[], nargs='*') + pull_group.add_argument('--all', action='store_true') + + + # lint + lint_parser = subparsers.add_parser('lint') + lint_group = lint_parser.add_mutually_exclusive_group(required=True) + lint_group.add_argument('remark', nargs='*', default=[]) + lint_group.add_argument('--all', action='store_true') + + + # build + build_parser = subparsers.add_parser('build') + build_parser.add_argument('type', choices=['docker', 'conda']) + build_parser.add_argument('--dry-run', action='store_true') + build_parser.add_argument('--jobs', '-J', default=4, type=int) + + build_group = build_parser.add_mutually_exclusive_group(required=True) + build_group.add_argument('remark', default=[], nargs='*') + build_group.add_argument('--all', action='store_true') + + + # execute + execute_parser = subparsers.add_parser('execute') + execute_parser.add_argument('type', choices=['docker', 'conda']) + execute_parser.add_argument('--jobs', '-J', default=4, type=int) + + execute_group = execute_parser.add_mutually_exclusive_group(required=True) + execute_group.add_argument('remark', default=[], nargs='*') + execute_group.add_argument('--all', action='store_true') + + # log + log_parser = subparsers.add_parser('logs') + + # clean + clean_parser = subparsers.add_parser('clean') + clean_parser.add_argument('type', choices=['docker', 'conda']) + + clean_group = clean_parser.add_mutually_exclusive_group(required=True) + clean_group.add_argument('remark', default=[], nargs='*') + clean_group.add_argument('--all', action='store_true') + + args = parser.parse_args() + + if args.action == 'pull': + to_pull = metadata.keys() if args.all else args.remark + for name in to_pull: + mdata = metadata[name] + print(f'Updating {name} @ {mdata.local}') + if git_exists(mdata.local): + git_pull(mdata.local) + else: + git_clone(mdata.local, remote=mdata.remote) + print('-' * 20, end='\n\n') + + elif args.action == 'lint': + to_lint = metadata.keys() if args.all else args.remark + with open(git_root / 'STANDARD.md') as f: + standard = re.search( + f'## the remark standard.*```(.*?)```', + f.read(), + flags=re.I | re.DOTALL + ).group(1).strip() + requirements = [*parse_paths_from_standard(standard)] + for remark in to_lint: + mdata = metadata[remark] + messages = [] + + for req in requirements: + if not mdata.local.joinpath(req).exists(): + messages.append(f'missing {req}') + + if messages: + print( + f' {remark} '.center(50, '-'), + mdata.local, + *(f'- {m}' for m in messages), + sep='\n', + end='\n'*2, + ) + + elif args.action == 'build': + report_dir = remark_home / 'logs' / 'build' + report_dir.mkdir(exist_ok=True, parents=True) + + if args.remark: + to_build = args.remark + elif args.all: + to_build = metadata.keys() + + with ThreadPoolExecutor(min(len(to_build), args.jobs)) as pool: + def submitter(name): + def _submitter(func, *args, **kwargs): + def wrapper(*args, **kwargs): + print(f'Building {name}') + return func(*args, **kwargs) + return pool.submit(wrapper, *args, **kwargs) + return _submitter + + futures = {} + for name in to_build: + mdata = metadata[name] + if args.type == 'docker': + fut = submitter(name)(build_docker, mdata.local, mdata.image_name) + elif args.type == 'conda': + fut = submitter(name)(build_conda, mdata.local) + futures[fut] = (mdata, args.type) + + + for comp in as_completed(futures): + mdata, build_type = futures[comp] + proc = comp.result() + + remark_name = mdata.yaml['name'] + report_log_path = report_dir / f'{remark_name}_{build_type}.log' + report_rc_path = report_dir / f'{remark_name}_{build_type}_rc.log' + + with open(report_log_path, 'w') as f: + f.write(proc.stdout) + with open(report_rc_path, 'w') as f: + f.write(str(proc.returncode)) + print(f'{remark_name} → {proc.returncode}') + if args.jobs == 1: + print(proc.stdout) + + elif args.action == 'execute': + report_dir = remark_home / 'logs' / 'execute' + report_dir.mkdir(exist_ok=True, parents=True) + if args.remark: + to_build = args.remark + elif args.all: + to_build = metadata.keys() + + with ThreadPoolExecutor(min(len(to_build), args.jobs)) as pool: + def submitter(name): + def _submitter(func, *args, **kwargs): + def wrapper(*args, **kwargs): + print(f'Executing {name}') + return func(*args, **kwargs) + return pool.submit(wrapper, *args, **kwargs) + return _submitter + + futures = {} + for name in to_build: + mdata = metadata[name] + if args.type == 'docker': + fut = submitter(name)(execute_docker, mdata.local, mdata.image_name) + elif args.type == 'conda': + fut = submitter(name)(execute_conda, mdata.local) + futures[fut] = (mdata, args.type) + + + for comp in as_completed(futures): + mdata, build_type = futures[comp] + proc = comp.result() + + remark_name = mdata.yaml['name'] + report_log_path = report_dir / f'{remark_name}_{build_type}.log' + report_rc_path = report_dir / f'{remark_name}_{build_type}_rc.log' + + with open(report_log_path, 'w') as f: + f.write(proc.stdout) + with open(report_rc_path, 'w') as f: + f.write(str(proc.returncode)) + print(f'{remark_name} → {proc.returncode}') + if args.jobs == 1: + print(proc.stdout) + + elif args.action == 'logs': + results = defaultdict(lambda: defaultdict(dict)) + padding = max(len(k) for k in metadata.keys()) + for name in metadata.keys(): + report_dir = remark_home / 'logs' + for log_file in sorted(report_dir.glob(f'*/*{name}*_rc.log')): + name, log_type, _ = log_file.name.rsplit('_', maxsplit=2) + results[log_file.parent.name][name][log_type] = log_file.read_text() + + for log_type, logs in results.items(): + padding = max(len(k) for k in logs) + print(f'{log_type:-^{padding}}') + for name, rc in logs.items(): + print(f'{name: <{padding}} = {rc}') + + elif args.action == 'clean': + if args.remark: + to_build = args.remark + elif args.all: + to_build = metadata.keys() + + for name in to_build: + mdata = metadata[name] + if args.type == 'docker': + clean_docker(mdata.image_name) + elif args.type == 'conda': + clean_conda(mdata.local) + From 8bcfc5562f1df564257cd3aa55bdd0e18a424530 Mon Sep 17 00:00:00 2001 From: Cameron Riddell Date: Thu, 28 Mar 2024 06:57:31 -0700 Subject: [PATCH 3/5] add beyond streetlight yaml --- REMARKs/beyond-the-streetlight.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 REMARKs/beyond-the-streetlight.yml diff --git a/REMARKs/beyond-the-streetlight.yml b/REMARKs/beyond-the-streetlight.yml new file mode 100644 index 00000000..51d72e9e --- /dev/null +++ b/REMARKs/beyond-the-streetlight.yml @@ -0,0 +1,3 @@ +name: beyond-the-streetlight +remote: https://github.com/dedwar65/beyond-the-streetlight +title: Beyond the streetlight From 4ef706026253b7038bf34257ec6121a8270dae5a Mon Sep 17 00:00:00 2001 From: Cameron Riddell Date: Wed, 3 Apr 2024 09:10:08 -0700 Subject: [PATCH 4/5] remove dry-run option from building --- cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cli.py b/cli.py index ace6983e..dd30adc7 100644 --- a/cli.py +++ b/cli.py @@ -175,7 +175,6 @@ def clean_conda(local_repo): # build build_parser = subparsers.add_parser('build') build_parser.add_argument('type', choices=['docker', 'conda']) - build_parser.add_argument('--dry-run', action='store_true') build_parser.add_argument('--jobs', '-J', default=4, type=int) build_group = build_parser.add_mutually_exclusive_group(required=True) From 70839b0a02237bc0fbefdfe159fe4f474e71d755 Mon Sep 17 00:00:00 2001 From: Cameron Riddell Date: Wed, 3 Apr 2024 09:13:16 -0700 Subject: [PATCH 5/5] add help for cli.py --- README.md | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 90 insertions(+) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 72b4e3b4..08cf40be 100644 --- a/README.md +++ b/README.md @@ -58,3 +58,92 @@ Jupyter notebook(s) should: ## Differences with DemARK The key difference with the contents of the [DemARK](https://github.com/econ-ark/DemARK) repo is that REMARKs are allowed to rely on the existence of local files and subdirectories (figures; data) at a predictable filepath relative to the location of the root. + +## For Maintainers + +**Command Line Interface** `cli.py` + +`cli.py` is an automated tool that facilitates: +- cloning of REMARK repositories +- linting (detection of missing files from a given REMARK) +- building conda environments/docker images + - uses `conda`/`repo2docker` under the hood +- executing `reproduce.sh` scripts within the built environments. + +All artifacts generated by `cli.py` are stored in a newly created `_REMARK` folder. + +1. Once you clone a REMARK you'll be able to find its contents inside of `_REMARK/repos/…` +2. Once you build/execute a REMARK you'll be able to find a corresponding log +file from that process inside of `_REMARK/logs/…` + +`cli.py` has built-in parallelization specified by the `-J` flag for many actions. + +### Requirements + +- python 3.9 or newer. +- contents `requirements.txt` + +### Action + +**Clone/Pull** + +pulling REMARKs (these are populated in the `_REMARKS` folder) + +```bash +python cli.py pull --all # git clone all REMARKS +python cli.py pull {remark_name} # git clone one or more REMARK(s) +``` + +**Lint** + +Shows what files are missing from given REMARK(s). The linter uses the +file-tree print out from STANDARD.md and compares it to the current files found +in the currently cloned REMARK(s). + +```bash +python cli.py lint --all # detect missing files from all REMARKs +python cli.py lint {remark_name} # detect missing files from one or more REMARK(s) +``` + +**Build** + +Building conda environments and/or docker images. + +```bash +python cli.py build conda --all # build conda environments for all REMARKs (stored as a `condaenv` folder inside the cloned REMARK repo) +python cli.py build docker --all # build docker images for all REMARKs (stored as a `condaenv` folder inside the cloned REMARK repo) +python cli.py build conda {remark_name} # build conda environments for one or more REMARK(s) +python cli.py build docker {remark_name} # build docker image(s) for one or more REMARK(s) +``` + +The primary difference between `conda` and `docker` for builds are that `docker` will be more flexible for multilanguage REMARKs. It leverages +repo2docker (same tool that mybinder uses) to create docker images from repositories. + +**Execute** + +Automated execution within built conda environments/docker containers. + +```bash +python cli.py execute conda --all # execute reproduce.sh via conda for all REMARKs +python cli.py execute docker --all # execute reproduce.sh via docker for all REMARKs +python cli.py execute conda {remark_name} # execute reproduce.sh via conda for one or more REMARK(s) +python cli.py execute docker {remark_name} # execute reproduce.sh via docker for one or more REMARK(s) +``` + +*Both the build and execute subcommands have an optional --jobs argument to +specify the number of jobs to run in parallel when building/executing.* + +**Logs/Summarize** + +```bash +python cli.py logs # view most recent logs for all previous building/executing commands +``` + +**Clean/Remove** + +```bash +python cli.py clean conda --all # remove all built conda environments +python cli.py clean docker --all # remove all build docker images +python cli.py clean conda {remark_name} # remove conda environment(s) from specified REMARK(s) +python cli.py clean docker {remark_name} # remove docker images built from specified REMARK(s) +``` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..ee251833 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pyyaml==6.0.0