chore: bump dependencies (#982)

Co-authored-by: Bento007 <[email protected]> Co-authored-by: Trent Smith <[email protected]>
chanzuckerberg · Aug 15, 2024 · 95d1cdf · 95d1cdf
1 parent 6a04066
commit 95d1cdf
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 64 deletions.
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -13,7 +13,7 @@
 import scipy
 from anndata._core.sparse_dataset import SparseDataset
 from cellxgene_ontology_guide.ontology_parser import OntologyParser
-from pandas.core.computation.ops import UndefinedVariableError
+from pandas.errors import UndefinedVariableError
 from scipy import sparse
 
 from . import gencode, schema
@@ -1236,9 +1236,11 @@ def _validate_raw_data_with_in_tissue_0(
         else:  # must be dense matrix
             nonzero_row_indices = np.where(np.any(x != 0, axis=1))[0]
         for i in range(x.shape[0]):
-            if not has_tissue_0_non_zero_row and i in nonzero_row_indices and self.adata.obs["in_tissue"][i] == 0:
+            if not has_tissue_0_non_zero_row and i in nonzero_row_indices and self.adata.obs["in_tissue"].iloc[i] == 0:
                 has_tissue_0_non_zero_row = True
-            elif not has_tissue_1_zero_row and i not in nonzero_row_indices and self.adata.obs["in_tissue"][i] == 1:
+            elif (
+                not has_tissue_1_zero_row and i not in nonzero_row_indices and self.adata.obs["in_tissue"].iloc[i] == 1
+            ):
                 has_tissue_1_zero_row = True
             if has_tissue_0_non_zero_row and has_tissue_1_zero_row:
                 # exit early and report

diff --git a/cellxgene_schema_cli/cellxgene_schema/write_labels.py b/cellxgene_schema_cli/cellxgene_schema/write_labels.py
@@ -234,7 +234,7 @@ def _get_labels(
         column: str,
         column_definition: dict,
         label_type: dict,
-    ) -> pd.Categorical:
+    ) -> pd.Series:
         """
         Retrieves a new column (pandas categorical) with labels based on the IDs in 'column' and the logic in the
         'column_definition'
@@ -287,7 +287,7 @@ def _get_labels(
         else:
             raise TypeError(f"'{label_type}' is not supported in 'add-labels' functionality")
 
-        new_column = original_column.copy().replace(mapping_dict).astype("category")
+        new_column = original_column.copy().map(mapping_dict).astype("category")
 
         return new_column
 

diff --git a/cellxgene_schema_cli/requirements.txt b/cellxgene_schema_cli/requirements.txt
@@ -1,12 +1,11 @@
-anndata==0.8.0
-cellxgene-ontology-guide==1.0.0
-click==8.1.7
-Cython==3.0.10
-numpy==1.26.4
-pandas==1.4.4
-PyYaml==6.0.1
-wheel==0.43.0
-scipy<1.13.0    # scipy 1.13.0 is not compatible with anndata <=0.10.6, revisit before next release
-semver==3.0.2
-xxhash==3.4.1
-matplotlib==3.9.0
+anndata>=0.8,<0.12
+cellxgene-ontology-guide==1.0.0 # update before a schema migration
+click<9
+Cython<4
+numpy<2
+pandas>2,<3
+PyYAML<7
+scipy<2
+semver<4
+xxhash<4
+matplotlib<4
diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py
@@ -81,9 +81,9 @@
     ],
 )
 
-good_obs.loc[:, ["donor_id"]] = good_obs.astype("category")
-good_obs.loc[:, ["suspension_type"]] = good_obs.astype("category")
-good_obs.loc[:, ["tissue_type"]] = good_obs.astype("category")
+good_obs["donor_id"] = good_obs["donor_id"].astype("category")
+good_obs["suspension_type"] = good_obs["suspension_type"].astype("category")
+good_obs["tissue_type"] = good_obs["tissue_type"].astype("category")
 
 # Expected obs, this is what the obs above should look like after adding the necessary columns with the validator,
 # these columns are defined in the schema
@@ -183,9 +183,9 @@
     ],
 )
 
-good_obs_visium.loc[:, ["donor_id"]] = good_obs_visium.astype("category")
-good_obs_visium.loc[:, ["suspension_type"]] = good_obs_visium.astype("category")
-good_obs_visium.loc[:, ["tissue_type"]] = good_obs_visium.astype("category")
+good_obs_visium["donor_id"] = good_obs_visium["donor_id"].astype("category")
+good_obs_visium["suspension_type"] = good_obs_visium["suspension_type"].astype("category")
+good_obs_visium["tissue_type"] = good_obs_visium["tissue_type"].astype("category")
 
 # Valid spatial obs per schema
 good_obs_slide_seqv2 = pd.DataFrame(
@@ -236,9 +236,9 @@
     ],
 )
 
-good_obs_slide_seqv2.loc[:, ["donor_id"]] = good_obs_slide_seqv2.astype("category")
-good_obs_slide_seqv2.loc[:, ["suspension_type"]] = good_obs_slide_seqv2.astype("category")
-good_obs_slide_seqv2.loc[:, ["tissue_type"]] = good_obs.astype("category")
+good_obs_slide_seqv2["donor_id"] = good_obs_slide_seqv2["donor_id"].astype("category")
+good_obs_slide_seqv2["suspension_type"] = good_obs_slide_seqv2["suspension_type"].astype("category")
+good_obs_slide_seqv2["tissue_type"] = good_obs_slide_seqv2["tissue_type"].astype("category")
 
 good_obs_visium_is_single_false = pd.DataFrame(
     [
@@ -288,9 +288,11 @@
     ],
 )
 
-good_obs_visium_is_single_false.loc[:, ["donor_id"]] = good_obs_visium_is_single_false.astype("category")
-good_obs_visium_is_single_false.loc[:, ["suspension_type"]] = good_obs_visium_is_single_false.astype("category")
-good_obs_visium_is_single_false.loc[:, ["tissue_type"]] = good_obs_visium_is_single_false.astype("category")
+good_obs_visium_is_single_false["donor_id"] = good_obs_visium_is_single_false["donor_id"].astype("category")
+good_obs_visium_is_single_false["suspension_type"] = good_obs_visium_is_single_false["suspension_type"].astype(
+    "category"
+)
+good_obs_visium_is_single_false["tissue_type"] = good_obs_visium_is_single_false["tissue_type"].astype("category")
 
 # ---
 # 2. Creating individual var components: valid object and valid object and with labels

diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -104,7 +104,7 @@ def save_and_read_adata(adata: anndata.AnnData) -> anndata.AnnData:
     """
     with tempfile.NamedTemporaryFile(suffix=".h5ad") as f:
         adata.write_h5ad(f.name)
-        return anndata.read(f.name)
+        return anndata.read_h5ad(f.name)
 
 
 class TestValidAnndata:
@@ -367,7 +367,7 @@ def test_raw_values__non_rna(self, validator_with_adata):
         validator.errors = []
         obs["assay_ontology_term_id"] = "EFO:0010891"
         obs["suspension_type"] = "nucleus"
-        obs.loc[:, ["suspension_type"]] = obs.astype("category")
+        obs["suspension_type"] = obs["suspension_type"].astype("category")
         validator.validate_adata()
         assert validator.errors == []
 
@@ -2433,19 +2433,21 @@ def test_obs_added_tissue_type_label__unknown(self, validator_with_adata):
         obs.at["Y", "tissue_type"] = "cell culture"  # Already set in example data, just setting explicitly here
         obs.at["Y", "tissue_ontology_term_id"] = "unknown"  # Testing this term case
         validator_with_adata.validate_adata()  # Validate
-        AnnDataLabelAppender(validator_with_adata)._add_labels()  # Annotate
+        labeler = AnnDataLabelAppender(validator_with_adata)
+        labeler._add_labels()  # Annotate
 
-        assert obs.at["Y", "tissue"] == "unknown"
+        assert labeler.adata.obs.at["Y", "tissue"] == "unknown"
 
     def test_obs_added_cell_type_label__unknown(self, validator_with_adata):
         obs = validator_with_adata.adata.obs
 
         # Arrange
         obs.at["Y", "cell_type_ontology_term_id"] = "unknown"  # Testing this term case
         validator_with_adata.validate_adata()  # Validate
-        AnnDataLabelAppender(validator_with_adata)._add_labels()  # Annotate
+        labeler = AnnDataLabelAppender(validator_with_adata)
+        labeler._add_labels()  # Annotate
 
-        assert obs.at["Y", "cell_type"] == "unknown"
+        assert labeler.adata.obs.at["Y", "cell_type"] == "unknown"
 
     def test_remove_unused_categories(self, label_writer, adata_with_labels):
         modified_donor_id = label_writer.adata.obs["donor_id"].cat.add_categories("donor_3")

diff --git a/cellxgene_schema_cli/tests/test_utils.py b/cellxgene_schema_cli/tests/test_utils.py
@@ -118,9 +118,9 @@ def test_remap_deprecated_features__without_raw(adata_without_raw, remapped_feat
 def test_replace_ontology_term__with_replacement(adata_with_raw, deprecated_term_map_with_replacement_match):
     replace_ontology_term(adata_with_raw.obs, "assay", deprecated_term_map_with_replacement_match)
 
-    expected = ["EFO:0009918", "EFO:0000001"]
+    expected = ["EFO:0000001", "EFO:0009918"]
     actual = adata_with_raw.obs["assay_ontology_term_id"].dtype.categories
-    assert all(a == b for a, b in zip(actual, expected))
+    assert sorted(actual) == expected
 
 
 def test_replace_ontology_term__no_replacement(adata_with_raw, deprecated_term_map_no_replacement_match):

diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
@@ -1020,16 +1020,6 @@ def test_determine_seurat_convertibility(self):
         assert len(self.validator.warnings) == 0
         assert self.validator.is_seurat_convertible
 
-        # h5ad where raw matrix variable count != length of raw var variables array is not Seurat-convertible
-        matrix = sparse.csr_matrix(np.zeros([good_obs.shape[0], good_var.shape[0]], dtype=np.float32))
-        raw = anndata.AnnData(X=matrix, var=good_var)
-        raw.var.drop("ENSSASG00005000004", axis=0, inplace=True)
-        self.validation_helper(matrix, raw)
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.errors) == 1
-        assert not self.validator.is_seurat_convertible
-        assert not self.validator.is_valid
-
         # Visium datasets are not Seurat-convertible
         self.validation_helper(sparse_matrix_with_zero)
         self.validator.adata.obs = adata_visium.obs.copy()
@@ -1066,19 +1056,6 @@ def test_fail_categorical_mixed_types(self, tmp_path, valid_adata):
         assert "in dataframe 'obs' contains 2 categorical types. Only one type is allowed." in validator.errors[0]
         self._fail_write_h5ad(tmp_path, valid_adata)
 
-    def test_fail_categorical_bool(self, tmp_path, valid_adata):
-        # Arrange
-        categories = [True, False]
-        self._add_catagorical_obs(valid_adata, categories)
-        validator = self._create_validator(valid_adata)
-
-        # Act
-        validator._validate_dataframe("obs")
-
-        # Assert
-        assert "in dataframe 'obs' contains illegal_categorical_types={<class 'bool'>}." in validator.errors[0]
-        self._fail_write_h5ad(tmp_path, valid_adata)
-
     def _add_catagorical_obs(self, adata, categories):
         t = pd.CategoricalDtype(categories=categories)
         adata.obs["test_cat"] = pd.Series(data=categories, index=["X", "Y"], dtype=t)
@@ -1131,11 +1108,11 @@ def create_validator(data: Union[ndarray, spmatrix], matrix_format: str) -> Vali
         "data, matrix_format, expected_result",
         [
             # Test case with integer values in a dense matrix
-            (np.array([[1, 2, 3], [4, 5, 6]], dtype=int), "dense", True),
+            (np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32), "dense", True),
             # Test case with float values in a dense matrix
             (np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]), "dense", False),
             # Test case with integer values in a sparse matrix (CSR format)
-            (sparse.csr_matrix([[1, 0, 3], [0, 5, 0]], dtype=int), "csr", True),
+            (sparse.csr_matrix([[1, 0, 3], [0, 5, 0]], dtype=np.float32), "csr", True),
             # Test case with float values in a sparse matrix (CSC format)
             (sparse.csc_matrix([[1.1, 0, 3.3], [0, 5.5, 0]]), "csc", False),
             # Test case with mixed integer and float values in a dense matrix
@@ -1148,7 +1125,7 @@ def test_has_valid_raw(self, data, matrix_format, expected_result):
 
     @mock.patch("cellxgene_schema.validate.get_matrix_format", return_value="unknown")
     def test_has_valid_raw_with_unknown_format(self, mock_get_matrix_format):
-        data = np.array([[1, 2, 3], [4, 5, 6]], dtype=int)
+        data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
         validator = self.create_validator(data, "unknown")
         with pytest.raises(AssertionError):
             validator._has_valid_raw()
diff --git a/scripts/schema_bump_dry_run_genes/requirements.txt b/scripts/schema_bump_dry_run_genes/requirements.txt
@@ -1,5 +1,5 @@
 requests==2.32.0
-tiledb==0.21.4 # Should match version pinned in single-cell-data-portal
+tiledb==0.30.2 # Should match version pinned in single-cell-data-portal
 pandas==2.2.2
 pyarrow>=1.0.0
 jinja2<4