From 11fb8a4ee3615a522fc2c802adc2271b11e49cb5 Mon Sep 17 00:00:00 2001 From: Sunny Sun <38218185+sunnyosun@users.noreply.github.com> Date: Thu, 5 Dec 2024 08:15:52 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20describing=20no=20label=20?= =?UTF-8?q?features=20(#2253)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_curate.py | 21 ++++++++------ lamindb/core/_feature_manager.py | 48 +++++++++++++++++++++++--------- tests/core/test_describe_df.py | 2 ++ 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/lamindb/_curate.py b/lamindb/_curate.py index 6b837a40e..a4f0575e0 100644 --- a/lamindb/_curate.py +++ b/lamindb/_curate.py @@ -1449,7 +1449,9 @@ def standardize(self, key: str): df = table.to_pandas() # map values - df[slot_key] = df[slot_key].map(lambda val: syn_mapper.get(val, val)) # noqa: B023 + df[slot_key] = df[slot_key].map( + lambda val: syn_mapper.get(val, val) # noqa + ) # write the mapped values with _open_tiledbsoma(self._experiment_uri, mode="w") as experiment: slot(experiment).write(pa.Table.from_pandas(df, schema=table.schema)) @@ -2044,16 +2046,17 @@ def _add_labels( ) if len(labels) == 0: continue + label_ref_is_name = None if hasattr(registry, "_name_field"): label_ref_is_name = field.field.name == registry._name_field - add_labels( - artifact, - records=labels, - feature=feature, - feature_ref_is_name=feature_ref_is_name, - label_ref_is_name=label_ref_is_name, - from_curator=True, - ) + add_labels( + artifact, + records=labels, + feature=feature, + feature_ref_is_name=feature_ref_is_name, + label_ref_is_name=label_ref_is_name, + from_curator=True, + ) if artifact._accessor == "MuData": for modality, modality_fields in fields.items(): diff --git a/lamindb/core/_feature_manager.py b/lamindb/core/_feature_manager.py index b5b485632..541dc8509 100644 --- a/lamindb/core/_feature_manager.py +++ b/lamindb/core/_feature_manager.py @@ -347,15 +347,15 @@ def describe_features( for feature_name in feature_names: feature_data[feature_name] = (slot, feature_set.registry) - internal_feature_names: set[str] = {} # type: ignore + internal_feature_names: dict[str, str] = {} if isinstance(self, Artifact): feature_sets = self.feature_sets.filter(registry="Feature").all() - internal_feature_names = set() # type: ignore + internal_feature_names = {} if len(feature_sets) > 0: for feature_set in feature_sets: - internal_feature_names = internal_feature_names.union( - set(feature_set.members.values_list("name", flat=True)) - ) # type: ignore + internal_feature_names.update( + dict(feature_set.members.values_list("name", "dtype")) + ) # categorical feature values # Get the categorical data using the appropriate method @@ -410,20 +410,42 @@ def describe_features( return dictionary # Internal features section - internal_features_slot: dict[ - str, list - ] = {} # internal features from the `Feature` registry that contain labels + # internal features that contain labels (only `Feature` features contain labels) + internal_feature_labels_slot: dict[str, list] = {} for feature_name, feature_row in internal_feature_labels.items(): slot, _ = feature_data.get(feature_name) - internal_features_slot.setdefault(slot, []).append(feature_row) - int_features_tree_children = [] + internal_feature_labels_slot.setdefault(slot, []).append(feature_row) + int_features_tree_children = [] for slot, (feature_set, feature_names) in feature_set_data.items(): - if slot in internal_features_slot: - feature_rows = internal_features_slot[slot] + if slot in internal_feature_labels_slot: + # add internal Feature features with labels + feature_rows = internal_feature_labels_slot[slot] + # add internal Feature features without labels + feature_rows += [ + ( + feature_name, + Text(str(internal_feature_names.get(feature_name)), style="dim"), + "", + ) + for feature_name in feature_names + if feature_name and feature_name not in internal_feature_labels + ] else: + # add internal non-Feature features without labels feature_rows = [ - (feature_name, Text(str(feature_set.dtype), style="dim"), "") + ( + feature_name, + Text( + str( + internal_feature_names.get(feature_name) + if feature_name in internal_feature_names + else feature_set.dtype + ), + style="dim", + ), + "", + ) for feature_name in feature_names if feature_name ] diff --git a/tests/core/test_describe_df.py b/tests/core/test_describe_df.py index d334a197b..39f7f069c 100644 --- a/tests/core/test_describe_df.py +++ b/tests/core/test_describe_df.py @@ -179,6 +179,7 @@ def test_curate_df(): "cell_medium", "cell_type_by_expert", "cell_type_by_model", + "sample_note", ] assert int_features_node.children[1].label.columns[1].header.plain == "[Feature]" assert ( @@ -196,6 +197,7 @@ def test_curate_df(): "DMSO, IFNG", "B cell, T cell", "B cell, T cell", + "", ] # external features section