diff --git a/.github/meta.yaml b/.github/meta.yaml index 18e8db60e0..287d81b007 100644 --- a/.github/meta.yaml +++ b/.github/meta.yaml @@ -27,14 +27,14 @@ outputs: - numpy >=1.22.0 - pandas >=1.5.0, <2.1.0 - dask >=2022.2.0, !=2022.10.1 - - scipy >=1.5.0, <1.12.0 + - scipy >=1.5.0 - scikit-learn >=1.3.2 - scikit-optimize >=0.9.0 - statsmodels >=0.12.2 - colorama >=0.4.4 - cloudpickle >=1.5.0 - click >=8.0.0 - - shap >=0.42.0, <0.45.0 + - shap >=0.45.0 - texttable >=1.6.2 - woodwork >=0.22.0 - featuretools >=1.16.0 diff --git a/.github/workflows/latest_dependency_checker.yaml b/.github/workflows/latest_dependency_checker.yaml index f38404501c..ac52c9d602 100644 --- a/.github/workflows/latest_dependency_checker.yaml +++ b/.github/workflows/latest_dependency_checker.yaml @@ -32,4 +32,4 @@ jobs: delete-branch: true base: main assignees: machineFL - reviewers: jeremyliweishih, chukarsten, MichaelFu512, eccabay, christopherbunn + reviewers: jeremyliweishih, thehomebrewnerd, MichaelFu512, eccabay, christopherbunn diff --git a/.github/workflows/lint_tests.yaml b/.github/workflows/lint_tests.yaml index ec3dc42245..b26523ff7c 100644 --- a/.github/workflows/lint_tests.yaml +++ b/.github/workflows/lint_tests.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python_version: ["3.9", "3.10"] + python_version: ["3.9", "3.10", "3.11"] steps: - name: Checkout repository uses: actions/checkout@v3 diff --git a/.github/workflows/linux_unit_tests_with_latest_deps.yaml b/.github/workflows/linux_unit_tests_with_latest_deps.yaml index ced7f343c0..bb5990734f 100644 --- a/.github/workflows/linux_unit_tests_with_latest_deps.yaml +++ b/.github/workflows/linux_unit_tests_with_latest_deps.yaml @@ -14,17 +14,8 @@ jobs: strategy: fail-fast: false matrix: - include: - - python_version: "3.9" - command: 'git-test-automl' - - python_version: "3.9" - command: 'git-test-prophet' - - python_version: "3.9" - command: 'git-test-modelunderstanding' - - python_version: "3.9" - command: 'git-test-other' - - python_version: "3.9" - command: 'git-test-parallel' + python_version: ['3.9', '3.10', '3.11'] + command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet', 'git-test-other'] steps: - name: Set up Python ${{ matrix.python_version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/linux_unit_tests_with_minimum_deps.yaml b/.github/workflows/linux_unit_tests_with_minimum_deps.yaml index ade8d2cb6a..8fa37594cf 100644 --- a/.github/workflows/linux_unit_tests_with_minimum_deps.yaml +++ b/.github/workflows/linux_unit_tests_with_minimum_deps.yaml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: python_version: ['3.9'] - command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-other', 'git-test-parallel', 'git-test-prophet'] + command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet', 'git-test-other'] steps: - name: Set up Python ${{ matrix.python_version }} uses: actions/setup-python@v4 diff --git a/.github/workflows/windows_unit_tests.yaml b/.github/workflows/windows_unit_tests.yaml index c6cd519c24..b7b929c1b5 100644 --- a/.github/workflows/windows_unit_tests.yaml +++ b/.github/workflows/windows_unit_tests.yaml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: python_version: ['3.9'] - command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-other', 'git-test-parallel', 'git-test-prophet'] + command: ['git-test-automl', 'git-test-modelunderstanding', 'git-test-parallel', 'git-test-prophet-no-parallel-cpu', 'git-test-other-no-parallel-cpu'] steps: - name: Download Miniconda shell: pwsh @@ -59,7 +59,7 @@ jobs: . $env:USERPROFILE\Miniconda3\shell\condabin\conda-hook.ps1 conda activate curr_py conda install numba -q -y - - if: ${{ matrix.command == 'git-test-prophet' }} + - if: ${{ matrix.command == 'git-test-prophet-no-parallel-cpu' }} name: Install EvalML with test requirements and prophet shell: pwsh run: | @@ -69,7 +69,7 @@ jobs: python -m pip install .[test] python -m pip install .[prophet] pip freeze - - if: ${{ matrix.command != 'git-test-prophet' }} + - if: ${{ matrix.command != 'git-test-prophet-no-parallel-cpu' }} name: Install EvalML with test requirements shell: pwsh run: | diff --git a/Makefile b/Makefile index df2da9c889..89a481a367 100644 --- a/Makefile +++ b/Makefile @@ -63,6 +63,14 @@ git-test-prophet: git-test-integration: pytest evalml/tests/integration_tests -n 2 --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-integration-junit.xml +.PHONY: git-test-other-no-parallel-cpu +git-test-other-no-parallel-cpu: + pytest evalml/tests --ignore evalml/tests/automl_tests/ --ignore evalml/tests/tuner_tests/ --ignore evalml/tests/model_understanding_tests/ --ignore evalml/tests/pipeline_tests/test_pipelines.py --ignore evalml/tests/component_tests/test_prophet_regressor.py --ignore evalml/tests/component_tests/test_components.py --ignore evalml/tests/component_tests/test_utils.py --ignore evalml/tests/integration_tests/ --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-other-junit.xml + make doctests + +.PHONY: git-test-prophet-no-parallel-cpu +git-test-prophet-no-parallel-cpu: + pytest evalml/tests/component_tests/test_prophet_regressor.py evalml/tests/component_tests/test_components.py evalml/tests/component_tests/test_utils.py evalml/tests/pipeline_tests/test_pipelines.py --durations 0 --timeout $(TIMEOUT) --cov=evalml --cov-config=./pyproject.toml --junitxml=test-reports/git-test-prophet-junit.xml .PHONY: installdeps installdeps: diff --git a/core-requirements.txt b/core-requirements.txt index 6a7f4dc3ed..6486d20a3b 100644 --- a/core-requirements.txt +++ b/core-requirements.txt @@ -1,13 +1,13 @@ numpy>=1.21.0 pandas>=1.5.0, <2.1.0 -scipy>=1.5.0, <1.12.0 +scipy>=1.5.0 scikit-learn>=1.3.2 scikit-optimize>=0.9.0 pyzmq>=20.0.0 colorama>=0.4.4 cloudpickle>=1.5.0 click>=8.0.0 -shap>=0.42.0 +shap>=0.45.0 statsmodels>=0.12.2 texttable>=1.6.2 woodwork>= 0.21.1 diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 6934ef6327..d64ca62024 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -9,12 +9,13 @@ Release Notes * Removed vowpalwabbit :pr:`4427` * Uncapped holidays :pr:`4428` * Unpinned kaleido :pr:`4423` + * Unpinned shap and scipy :pr:`4436` * Documentation Changes * Testing Changes - * Run airflow tests in Python 3.9 :pr:`4391` - * Remove iterative test from airflow runs :pr:`4424` - * Update GH actions to improve handling of potentially unsafe variables :pr:`4417` - * Fix install test :pr:`4423` + * Added ability to run airflow tests in Python 3.9 :pr:`4391` + * Removed iterative test from airflow runs :pr:`4424` + * Updated GH actions to improve handling of potentially unsafe variables :pr:`4417` + * Fixed install test :pr:`4423` .. warning:: diff --git a/evalml/data_checks/target_distribution_data_check.py b/evalml/data_checks/target_distribution_data_check.py index 4ac085ccaa..a06b1e556c 100644 --- a/evalml/data_checks/target_distribution_data_check.py +++ b/evalml/data_checks/target_distribution_data_check.py @@ -147,7 +147,8 @@ def _detect_log_distribution_helper(y): normalization_test = shapiro if len(y) <= 5000 else jarque_bera normalization_test_string = "shapiro" if len(y) <= 5000 else "jarque_bera" # Check if a normal distribution is detected with p-value above 0.05 - if normalization_test(y).pvalue >= 0.05: + pvalue = normalization_test(y).pvalue + if pvalue >= 0.05 or np.isnan(pvalue): return False, normalization_test_string, None y_new = round(y, 6) @@ -161,6 +162,6 @@ def _detect_log_distribution_helper(y): # If the p-value of the log transformed target is greater than or equal to the p-value of the original target # with outliers dropped, then it would imply that the log transformed target has more of a normal distribution - if norm_test_log.pvalue >= norm_test_og.pvalue: + if round(norm_test_log.pvalue, 6) >= round(norm_test_og.pvalue, 6): return True, normalization_test_string, norm_test_og return False, normalization_test_string, norm_test_og diff --git a/evalml/model_understanding/prediction_explanations/_algorithms.py b/evalml/model_understanding/prediction_explanations/_algorithms.py index 6b87a92128..1a7389ad22 100644 --- a/evalml/model_understanding/prediction_explanations/_algorithms.py +++ b/evalml/model_understanding/prediction_explanations/_algorithms.py @@ -144,15 +144,6 @@ def _compute_shap_values(pipeline, features, training_data=None): if ws: logger.debug(f"_compute_shap_values TreeExplainer: {ws[0].message}") shap_values = explainer.shap_values(features, check_additivity=False) - # shap only outputs values for positive class for Catboost/Xgboost binary estimators. - # this modifies the output to match the output format of other binary estimators. - # Ok to fill values of negative class with zeros since the negative class will get dropped - # in the UI anyways. - if estimator.model_family in { - ModelFamily.CATBOOST, - ModelFamily.XGBOOST, - } and is_binary(pipeline.problem_type): - shap_values = [np.zeros(shap_values.shape), shap_values] else: if training_data is None: raise ValueError( @@ -189,16 +180,29 @@ def _compute_shap_values(pipeline, features, training_data=None): except IndexError: expected_value = explainer.expected_value - # classification problem - if isinstance(shap_values, list): - mappings = [] - for class_shap_values in shap_values: - mappings.append(_create_dictionary(class_shap_values, feature_names)) - return (mappings, expected_value) # regression problem - elif isinstance(shap_values, np.ndarray): + if is_regression(pipeline.problem_type): dic = _create_dictionary(shap_values, feature_names) return (dic, expected_value) + # classification problem + if len(shap_values.shape) == 3: + mappings = [] + for class_shap_values in shap_values.T: + mappings.append(_create_dictionary(class_shap_values.T, feature_names)) + return (mappings, expected_value) + # shap only outputs values for positive class for boosted binary estimators. + # this modifies the output to match the output format of other binary estimators. + # Ok to fill values of negative class with the positive class since the negative class + # will get dropped in the UI anyways. + if estimator.model_family in { + ModelFamily.CATBOOST, + ModelFamily.XGBOOST, + ModelFamily.LIGHTGBM, + } and is_binary(pipeline.problem_type): + mappings = [] + for _ in range(2): + mappings.append(_create_dictionary(shap_values, feature_names)) + return (mappings, expected_value) else: raise ValueError(f"Unknown shap_values datatype {str(type(shap_values))}!") diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py index d26ad9ba02..155d131fc2 100644 --- a/evalml/tests/automl_tests/test_automl.py +++ b/evalml/tests/automl_tests/test_automl.py @@ -1530,9 +1530,12 @@ def test_results_getter(AutoMLTestEnv, X_y_binary): assert automl.results["pipeline_results"][0]["mean_cv_score"] == 1.0 - with pytest.raises(AttributeError, match="set attribute"): + with pytest.raises(AttributeError) as atr_error: automl.results = 2.0 + assert "set attribute" in str(atr_error.value) or "has no setter" in str( + atr_error.value, + ) automl.results["pipeline_results"][0]["mean_cv_score"] = 2.0 assert automl.results["pipeline_results"][0]["mean_cv_score"] == 1.0 @@ -4850,7 +4853,7 @@ def test_search_parameters_held_automl( max_batches=batches, ) aml.search() - estimator_args = inspect.getargspec(RandomForestClassifier) + estimator_args = inspect.getfullargspec(RandomForestClassifier) # estimator_args[0] gives the parameter names, while [3] gives the associated values # estimator_args[0][i + 1] to skip 'self' in the estimator # we do len - 1 in order to skip the random seed, which isn't present in the row['parameters'] diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt index 6470fd905b..196653e209 100644 --- a/evalml/tests/dependency_update_check/latest_dependency_versions.txt +++ b/evalml/tests/dependency_update_check/latest_dependency_versions.txt @@ -27,9 +27,9 @@ pmdarima==2.0.4 pyzmq==26.0.3 scikit-learn==1.4.2 scikit-optimize==0.10.1 -scipy==1.11.4 +scipy==1.13.1 seaborn==0.13.2 -shap==0.44.1 +shap==0.45.1 sktime==0.28.1 statsmodels==0.14.2 texttable==1.7.0 diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt index b00f83e04f..0b059e6bee 100644 --- a/evalml/tests/dependency_update_check/minimum_requirements.txt +++ b/evalml/tests/dependency_update_check/minimum_requirements.txt @@ -27,7 +27,7 @@ scikit-learn==1.3.2 scikit-optimize==0.9.0 scipy==1.5.0 seaborn==0.11.1 -shap==0.42.0 +shap==0.45.0 sktime==0.21.0 statsmodels==0.12.2 texttable==1.6.2 diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt index 49744a8cb7..2be6d022c7 100644 --- a/evalml/tests/dependency_update_check/minimum_test_requirements.txt +++ b/evalml/tests/dependency_update_check/minimum_test_requirements.txt @@ -35,7 +35,7 @@ scikit-learn==1.3.2 scikit-optimize==0.9.0 scipy==1.5.0 seaborn==0.11.1 -shap==0.42.0 +shap==0.45.0 sktime==0.21.0 statsmodels==0.12.2 texttable==1.6.2 diff --git a/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_algorithms.py b/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_algorithms.py index a5f0c10c74..ccbb43040f 100644 --- a/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_algorithms.py +++ b/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_algorithms.py @@ -332,7 +332,7 @@ def test_compute_shap_values_catches_shap_tree_warnings( def raise_warning_from_shap(estimator, feature_perturbation): warnings.warn("Shap raised a warning!") mock = MagicMock() - mock.shap_values.return_value = np.zeros(10) + mock.shap_values.return_value = np.zeros((1, 10, 2)) return mock mock_tree_explainer.side_effect = raise_warning_from_shap diff --git a/pyproject.toml b/pyproject.toml index 2da1e0473a..e1512e9787 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,14 +31,14 @@ requires-python = ">=3.9,<4" dependencies = [ "numpy >= 1.22.0", "pandas >= 1.5.0, <2.1.0", - "scipy >= 1.5.0, < 1.12.0", + "scipy >= 1.5.0", "scikit-learn >= 1.3.2", "scikit-optimize >= 0.9.0", "pyzmq >= 20.0.0", "colorama >= 0.4.4", "cloudpickle >= 1.5.0", "click >= 8.0.0", - "shap >= 0.42.0, < 0.45.0", + "shap >= 0.45.0", "statsmodels >= 0.12.2", "texttable >= 1.6.2", "woodwork[dask] >= 0.22.0", @@ -78,9 +78,9 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest == 7.1.2", - "pytest-xdist == 2.1.0", - "pytest-timeout == 1.4.2", - "pytest-cov == 2.10.1", + "pytest-xdist >= 2.1.0", + "pytest-timeout >= 1.4.2", + "pytest-cov >= 2.10.1", "nbval == 0.9.3", "IPython >= 8.10.0, <8.12.1", "PyYAML == 6.0.1",