Skip to content

Commit

Permalink
🐛 Fix describing no label features (#2253)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunnyosun authored Dec 5, 2024
1 parent 6e26a52 commit 11fb8a4
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 22 deletions.
21 changes: 12 additions & 9 deletions lamindb/_curate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1449,7 +1449,9 @@ def standardize(self, key: str):

df = table.to_pandas()
# map values
df[slot_key] = df[slot_key].map(lambda val: syn_mapper.get(val, val)) # noqa: B023
df[slot_key] = df[slot_key].map(
lambda val: syn_mapper.get(val, val) # noqa
)
# write the mapped values
with _open_tiledbsoma(self._experiment_uri, mode="w") as experiment:
slot(experiment).write(pa.Table.from_pandas(df, schema=table.schema))
Expand Down Expand Up @@ -2044,16 +2046,17 @@ def _add_labels(
)
if len(labels) == 0:
continue
label_ref_is_name = None
if hasattr(registry, "_name_field"):
label_ref_is_name = field.field.name == registry._name_field
add_labels(
artifact,
records=labels,
feature=feature,
feature_ref_is_name=feature_ref_is_name,
label_ref_is_name=label_ref_is_name,
from_curator=True,
)
add_labels(
artifact,
records=labels,
feature=feature,
feature_ref_is_name=feature_ref_is_name,
label_ref_is_name=label_ref_is_name,
from_curator=True,
)

if artifact._accessor == "MuData":
for modality, modality_fields in fields.items():
Expand Down
48 changes: 35 additions & 13 deletions lamindb/core/_feature_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,15 +347,15 @@ def describe_features(
for feature_name in feature_names:
feature_data[feature_name] = (slot, feature_set.registry)

internal_feature_names: set[str] = {} # type: ignore
internal_feature_names: dict[str, str] = {}
if isinstance(self, Artifact):
feature_sets = self.feature_sets.filter(registry="Feature").all()
internal_feature_names = set() # type: ignore
internal_feature_names = {}
if len(feature_sets) > 0:
for feature_set in feature_sets:
internal_feature_names = internal_feature_names.union(
set(feature_set.members.values_list("name", flat=True))
) # type: ignore
internal_feature_names.update(
dict(feature_set.members.values_list("name", "dtype"))
)

# categorical feature values
# Get the categorical data using the appropriate method
Expand Down Expand Up @@ -410,20 +410,42 @@ def describe_features(
return dictionary

# Internal features section
internal_features_slot: dict[
str, list
] = {} # internal features from the `Feature` registry that contain labels
# internal features that contain labels (only `Feature` features contain labels)
internal_feature_labels_slot: dict[str, list] = {}
for feature_name, feature_row in internal_feature_labels.items():
slot, _ = feature_data.get(feature_name)
internal_features_slot.setdefault(slot, []).append(feature_row)
int_features_tree_children = []
internal_feature_labels_slot.setdefault(slot, []).append(feature_row)

int_features_tree_children = []
for slot, (feature_set, feature_names) in feature_set_data.items():
if slot in internal_features_slot:
feature_rows = internal_features_slot[slot]
if slot in internal_feature_labels_slot:
# add internal Feature features with labels
feature_rows = internal_feature_labels_slot[slot]
# add internal Feature features without labels
feature_rows += [
(
feature_name,
Text(str(internal_feature_names.get(feature_name)), style="dim"),
"",
)
for feature_name in feature_names
if feature_name and feature_name not in internal_feature_labels
]
else:
# add internal non-Feature features without labels
feature_rows = [
(feature_name, Text(str(feature_set.dtype), style="dim"), "")
(
feature_name,
Text(
str(
internal_feature_names.get(feature_name)
if feature_name in internal_feature_names
else feature_set.dtype
),
style="dim",
),
"",
)
for feature_name in feature_names
if feature_name
]
Expand Down
2 changes: 2 additions & 0 deletions tests/core/test_describe_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def test_curate_df():
"cell_medium",
"cell_type_by_expert",
"cell_type_by_model",
"sample_note",
]
assert int_features_node.children[1].label.columns[1].header.plain == "[Feature]"
assert (
Expand All @@ -196,6 +197,7 @@ def test_curate_df():
"DMSO, IFNG",
"B cell, T cell",
"B cell, T cell",
"",
]

# external features section
Expand Down

0 comments on commit 11fb8a4

Please sign in to comment.