Improve CI Run-Time: Test Smarter (#625)

* test smarter * trigger plots tests on custom ci command * rename to plots * move file * ritest and multcomp to slow tests * extended tests * rename files * rename files * fix typo
py-econometrics · Sep 18, 2024 · 442cb6d · 442cb6d
1 parent c4466b5
commit 442cb6d
Show file tree

Hide file tree

Showing 8 changed files with 102 additions and 13 deletions.
diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml
@@ -45,20 +45,20 @@ jobs:
 
       - name: Run tests
         run: |
-          poetry run pytest tests -m "not (extended or slow)" --cov=pyfixest --cov-report=xml
+          poetry run pytest tests -m "not (extended or slow or plots)" --cov=pyfixest --cov-report=xml
       - name: Upload to Codecov
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
 
   test_slow:
-    name: "Test Slow"
+    name: "Tests vs fixest"
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest"]
-        python-version: ["3.12"]
+        python-version: ["3.9", "3.12"]
         pytest_opts: ["--workers 4 --tests-per-worker 1"]
 
     steps:

diff --git a/.github/workflows/extended-tests.yml b/.github/workflows/extended-tests.yml
@@ -0,0 +1,54 @@
+name: Tests Extended
+
+# Trigger on two events: push to master or labeled PR
+on:
+  # Trigger on push to master
+  push:
+    branches:
+      - master
+
+  # Trigger when a label is added to a PR
+  pull_request:
+    types: [labeled]
+
+jobs:
+  run-workflow:
+    runs-on: ubuntu-latest
+
+    # Matrix strategy for Python versions 3.9 and 3.12
+    strategy:
+      matrix:
+        python-version: [3.9, 3.12]
+
+    steps:
+      # Checkout source code (common to both scenarios)
+      - name: Checkout source
+        uses: actions/checkout@v4
+
+      # Setup python (common to both scenarios)
+      - name: Setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      # Set up poetry (common to both scenarios)
+      - name: Set up poetry
+        uses: snok/install-poetry@v1
+
+      # Install dependencies (common to both scenarios)
+      - name: Install dependencies
+        run: poetry install --without docs
+
+      # Set numba parallel flags (common to both scenarios)
+      - name: Set numba parallel flags
+        run: echo "NUMBA_NUM_THREADS=1" >> $GITHUB_ENV
+
+      # Run tests for PRs with the label "plots"
+      - name: Run tests for plots (only on PRs with the 'tests-extended' label)
+        if: github.event_name == 'pull_request' && contains(github.event.label.name, 'tests-extended')
+        run: poetry run pytest tests -m "plots" --cov=pyfixest --cov-report=xml
+
+      # Run tests for push to master
+      - name: Run tests for push to master
+        if: github.event_name == 'push' && github.ref == 'refs/heads/master'
+        run: poetry run pytest tests -m "plots" --cov=pyfixest --cov-report=xml
diff --git a/justfile b/justfile
@@ -31,6 +31,9 @@ update-tests-data:
 tests:
 	poetry run pytest -n 4 --cov-report=term tests
 
+tests-plots:
+	poetry run pytest -m "plots" -n 4 --cov-report=term tests
+
 rerun-tests:
 	poetry run pytest --lf -n 4
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -60,6 +60,7 @@ addopts = [
 markers = [
   "slow: mark test to be part of the slow test suite",
   "extended: mark test to be part of the extended test suite",
+  "plots: marks all tests for plotting functionality, tests only triggered when using tag in github issue",
 ]
 filterwarnings = [
   "ignore::FutureWarning:rpy2",

diff --git a/tests/test_multcomp.py b/tests/test_multcomp.py
@@ -17,6 +17,7 @@
 broom = importr("broom")
 
 
+@pytest.mark.extended
 def test_bonferroni():
     data = get_data().dropna()
     rng = np.random.default_rng(989)
@@ -51,6 +52,7 @@ def test_bonferroni():
     ), "bonferroni failed"
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("seed", [293, 912, 831])
 @pytest.mark.parametrize("sd", [0.5, 1.0, 1.5])
 def test_wildrwolf_hc(seed, sd):
@@ -90,6 +92,7 @@ def test_wildrwolf_hc(seed, sd):
         )
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("seed", [29090381, 32, 99932444])
 @pytest.mark.parametrize("sd", [0.5, 1.0, 1.5])
 def test_wildrwolf_crv(seed, sd):
@@ -131,6 +134,7 @@ def test_wildrwolf_crv(seed, sd):
         )
 
 
+@pytest.mark.extended
 def test_stepwise_function():
     B = 1000
     S = 5
@@ -148,6 +152,7 @@ def test_stepwise_function():
 # Import data from pyfixest
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("seed", [1000, 2000, 3000])
 @pytest.mark.parametrize("reps", [999, 1999])
 def test_sampling_scheme(seed, reps):

diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -40,6 +40,7 @@ def fit_multi(data):
     return feols(fml="Y + Y2 ~ i(f2, X1)", data=data)
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize(
     argnames="figsize",
     argvalues=[(10, 6), None],
@@ -61,11 +62,13 @@ def test_set_figsize(figsize, plot_backend):
         assert figsize_not_none == figsize
 
 
+@pytest.mark.extended
 def test_set_figsize_not_none_bad_backend():
     figsize_not_none = set_figsize((10, 6), "bad_backend")
     assert figsize_not_none == (10, 6)
 
 
+@pytest.mark.extended
 def test_set_figsize_none_bad_backend():
     with pytest.raises(
         ValueError, match="plot_backend must be either 'lets_plot' or 'matplotlib'."
@@ -101,6 +104,7 @@ def test_set_figsize_none_bad_backend():
     argvalues=[None, {"f2": "F2", "X1": "1x"}],
     ids=["no_labels", "labels"],
 )
+@pytest.mark.extended
 def test_iplot(
     fit1,
     fit2,
@@ -137,6 +141,7 @@ def test_iplot(
     iplot([fit1, fit2], **plot_kwargs)
 
 
+@pytest.mark.extended
 def test_iplot_error(data):
     with pytest.raises(ValueError):
         fit4 = feols(fml="Y ~ X1", data=data, vcov="iid")
@@ -175,6 +180,7 @@ def test_iplot_error(data):
     argvalues=[None, {"f2": "F2", "X1": "1x"}],
     ids=["no_labels", "labels"],
 )
+@pytest.mark.extended
 def test_coefplot(
     fit1,
     fit2,
@@ -212,27 +218,31 @@ def test_coefplot(
     fit_multi.coefplot(**plot_kwargs)
 
 
+@pytest.mark.extended
 @patch("pyfixest.report.visualize._coefplot_matplotlib")
 def test_coefplot_default_figsize_matplotlib(_coefplot_matplotlib_mock, fit1, data):
     coefplot(fit1, plot_backend="matplotlib")
     _, kwargs = _coefplot_matplotlib_mock.call_args
     assert kwargs.get("figsize") == (10, 6)
 
 
+@pytest.mark.extended
 @patch("pyfixest.report.visualize._coefplot_matplotlib")
 def test_coefplot_non_default_figsize_matplotlib(_coefplot_matplotlib_mock, fit1, data):
     coefplot(fit1, figsize=(12, 7), plot_backend="matplotlib")
     _, kwargs = _coefplot_matplotlib_mock.call_args
     assert kwargs.get("figsize") == (12, 7)
 
 
+@pytest.mark.extended
 @patch("pyfixest.report.visualize._coefplot_lets_plot")
 def test_coefplot_default_figsize_lets_plot(_coefplot_lets_plot_mock, fit1, data):
     coefplot(fit1, plot_backend="lets_plot")
     _, kwargs = _coefplot_lets_plot_mock.call_args
     assert kwargs.get("figsize") == (500, 300)
 
 
+@pytest.mark.extended
 @patch("pyfixest.report.visualize._coefplot_lets_plot")
 def test_coefplot_non_default_figsize_lets_plot(_coefplot_lets_plot_mock, fit1, data):
     coefplot(fit1, figsize=(600, 400), plot_backend="lets_plot")

diff --git a/tests/test_ritest.py b/tests/test_ritest.py
@@ -8,6 +8,7 @@
 matplotlib.use("Agg")  # Use a non-interactive backend
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
 @pytest.mark.parametrize("resampvar", ["X1", "f3"])
 @pytest.mark.parametrize("reps", [111, 212])
@@ -45,6 +46,7 @@ def test_algos_internally(data, fml, resampvar, reps, cluster):
     assert np.allclose(ritest_stats1, ritest_stats2, atol=1e-8, rtol=1e-8)
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
 @pytest.mark.parametrize("resampvar", ["X1", "f3"])
 @pytest.mark.parametrize("reps", [1000])
@@ -95,6 +97,7 @@ def data():
     return pf.get_data(N=1000, seed=2999)
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
 @pytest.mark.parametrize("resampvar", ["X1", "f3", "X1=-0.75", "f3>0.05"])
 @pytest.mark.parametrize("cluster", [None, "group_id"])
@@ -144,6 +147,7 @@ def test_vs_r(data, fml, resampvar, cluster, ritest_results):
     assert np.allclose(res1["2.5% (Pr(>|t|))"], ci_lower, rtol=0.005, atol=0.005)
 
 
+@pytest.mark.extended
 def test_fepois_ritest():
     data = pf.get_data(model="Fepois")
     fit = pf.fepois("Y ~ X1*f3", data=data)
@@ -158,6 +162,7 @@ def data_r_vs_t():
     return pf.get_data(N=5000, seed=2999)
 
 
+@pytest.mark.extended
 @pytest.mark.parametrize("fml", ["Y~X1+f3", "Y~X1+f3|f1", "Y~X1+f3|f1+f2"])
 @pytest.mark.parametrize("resampvar", ["X1", "f3"])
 @pytest.mark.parametrize("cluster", [None, "group_id"])

diff --git a/tests/test_vs_fixest.py b/tests/test_vs_fixest.py
@@ -146,15 +146,26 @@ def check_relative_diff(x1, x2, tol, msg=None):
 test_counter_fepois = 0
 test_counter_feiv = 0
 
+# What is being tested in all tests:
+# - pyfixest vs fixest
+# - inference: iid, hetero, cluster
+# - weights: None, "weights"
+# - fmls
+# Only tests for feols, not for fepois or feiv:
+# - dropna: False, True
+# - f3_type: "str", "object", "int", "categorical", "float"
+# - adj: True
+# - cluster_adj: True
+
 
 @pytest.mark.slow
 @pytest.mark.parametrize("dropna", [False, True])
 @pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}])
 @pytest.mark.parametrize("weights", [None, "weights"])
 @pytest.mark.parametrize("f3_type", ["str", "object", "int", "categorical", "float"])
 @pytest.mark.parametrize("fml", ols_fmls + ols_but_not_poisson_fml)
-@pytest.mark.parametrize("adj", [False, True])
-@pytest.mark.parametrize("cluster_adj", [False, True])
+@pytest.mark.parametrize("adj", [True])
+@pytest.mark.parametrize("cluster_adj", [True])
 def test_single_fit_feols(
     data_feols,
     dropna,
@@ -322,12 +333,12 @@ def test_single_fit_feols_empty(
 
 
 @pytest.mark.slow
-@pytest.mark.parametrize("dropna", [False, True])
+@pytest.mark.parametrize("dropna", [False])
 @pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}])
-@pytest.mark.parametrize("f3_type", ["str", "object", "int", "categorical", "float"])
+@pytest.mark.parametrize("f3_type", ["str"])
 @pytest.mark.parametrize("fml", ols_fmls)
-@pytest.mark.parametrize("adj", [False, True])
-@pytest.mark.parametrize("cluster_adj", [False, True])
+@pytest.mark.parametrize("adj", [True])
+@pytest.mark.parametrize("cluster_adj", [True])
 def test_single_fit_fepois(
     data_fepois, dropna, inference, f3_type, fml, adj, cluster_adj
 ):
@@ -429,13 +440,13 @@ def test_single_fit_fepois(
 
 
 @pytest.mark.slow
-@pytest.mark.parametrize("dropna", [False, True])
+@pytest.mark.parametrize("dropna", [False])
 @pytest.mark.parametrize("weights", [None, "weights"])
 @pytest.mark.parametrize("inference", ["iid", "hetero", {"CRV1": "group_id"}])
-@pytest.mark.parametrize("f3_type", ["str", "object", "int", "categorical", "float"])
+@pytest.mark.parametrize("f3_type", ["str"])
 @pytest.mark.parametrize("fml", iv_fmls)
-@pytest.mark.parametrize("adj", [False, True])
-@pytest.mark.parametrize("cluster_adj", [False, True])
+@pytest.mark.parametrize("adj", [True])
+@pytest.mark.parametrize("cluster_adj", [True])
 def test_single_fit_iv(
     data_feols,
     dropna,