Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add greedy labels resolver #2064

Merged
merged 5 commits into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
- Support direct annotation input for COCO format (<https://github.com/openvinotoolkit/training_extensions/pull/1921>)
- Action task supports multi GPU training. (<https://github.com/openvinotoolkit/training_extensions/pull/2057>)
- Support storage cache in Apache Arrow using Datumaro for action tasks (<https://github.com/openvinotoolkit/training_extensions/pull/2087>)
- Add a simplified greedy labels postprocessing for hierarchical classification (<https://github.com/openvinotoolkit/training_extensions/pull/2064>).

### Enhancements

Expand Down
62 changes: 62 additions & 0 deletions otx/api/entities/label_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,68 @@ def from_labels(cls, labels: Sequence[LabelEntity]) -> "LabelSchemaEntity":
label_group = LabelGroup(name="from_label_list", labels=labels)
return LabelSchemaEntity(label_groups=[label_group])

def resolve_labels_greedily(self, scored_labels: List[ScoredLabel]) -> List[ScoredLabel]:
"""Resolves hierarchical labels and exclusivity based on a list of ScoredLabels (labels with probability).

The following two steps are taken:

- select the most likely label from each label group
- add it and it's predecessors if they are also most likely labels (greedy approach).

Args:
scored_labels (List[LabelEntity]): list of labels to resolve

Returns:
List[ScoredLabel]: List of ScoredLabels (labels with probability)
"""

def get_predecessors(lbl: LabelEntity, candidates: List[LabelEntity]) -> List[LabelEntity]:
"""Returns all the predecessors of the input label or an empty list if one of the predecessors is not a candidate."""
predecessors = []
last_parent = self.get_parent(lbl)
if last_parent is None:
return [lbl]

while last_parent is not None:
if last_parent not in candidates:
return []
predecessors.append(last_parent)
last_parent = self.get_parent(last_parent)

if predecessors:
predecessors.append(lbl)
return predecessors

label_to_prob = {lbl: 0.0 for lbl in self.get_labels(include_empty=True)}
for s_lbl in scored_labels:
label_to_prob[s_lbl.label] = s_lbl.probability

candidates = []
for g in self.get_groups():
if g.is_single_label():
candidates.append(g.labels[0])
else:
max_prob = 0.0
max_label = None
for lbl in g.labels:
if label_to_prob[lbl] > max_prob:
max_prob = label_to_prob[lbl]
max_label = lbl
if max_label is not None:
candidates.append(max_label)

output_labels = []
for lbl in candidates:
if lbl in output_labels:
continue
labels_to_add = get_predecessors(lbl, candidates)
for new_lbl in labels_to_add:
if new_lbl not in output_labels:
output_labels.append(new_lbl)

output_scored_labels = [ScoredLabel(lbl, label_to_prob[lbl]) for lbl in output_labels]
return output_scored_labels

def resolve_labels_probabilistic(
self,
scored_labels: List[ScoredLabel],
Expand Down
77 changes: 70 additions & 7 deletions tests/unit/api/entities/test_label_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1780,17 +1780,17 @@ def test_label_schema_from_labels(self):
@pytest.mark.priority_medium
@pytest.mark.unit
@pytest.mark.reqids(Requirements.REQ_1)
def test_label_schema_resolve_labels_probabilistic(self):
def test_label_schema_resolve_labels(self):
"""
<b>Description:</b>
Check LabelSchemaEntity class resolve_labels_probabilistic method
Check LabelSchemaEntity label resolving algorithms

<b>Input data:</b>
LabelSchemaEntity objects with specified label_tree and label_groups parameters

<b>Expected results:</b>
Test passes if LabelSchemaEntity object returned by resolve_labels_probabilistic
method is equal expected
Test passes if labels list returned by resolving methods
is equal expected
"""
label_schema = LabelSchemaEntity()
labels_1 = [
Expand Down Expand Up @@ -1838,10 +1838,15 @@ def test_label_schema_resolve_labels_probabilistic(self):
ScoredLabel(labels_2[1], 0.5),
]
resloved_labels = label_schema.resolve_labels_probabilistic(predicted_labels)
assert [

ref_labels = [
ScoredLabel(labels_1[1], 0.5),
ScoredLabel(labels_2[1], 0.5),
] == resloved_labels
]
assert ref_labels == resloved_labels

resloved_labels_greedy = label_schema.resolve_labels_greedily(predicted_labels)
assert ref_labels == resloved_labels_greedy

# supress children of non-maximum labels
predicted_labels = [
Expand All @@ -1850,4 +1855,62 @@ def test_label_schema_resolve_labels_probabilistic(self):
ScoredLabel(labels_3[0], 0.4),
]
resloved_labels = label_schema.resolve_labels_probabilistic(predicted_labels)
assert [ScoredLabel(labels_2[1], 0.5)] == resloved_labels
ref_labels = [ScoredLabel(labels_2[1], 0.5)]
assert ref_labels == resloved_labels

resloved_labels_greedy = label_schema.resolve_labels_greedily(predicted_labels)
assert ref_labels == resloved_labels_greedy

@pytest.mark.reqids(Requirements.REQ_1)
def test_label_schema_resolve_labels_greedy(self):
"""
<b>Description:</b>
Check LabelSchemaEntity label gredy resolving algorithm

<b>Input data:</b>
LabelSchemaEntity objects with specified label_tree and label_groups parameters

<b>Expected results:</b>
Test passes if labels list returned by resolving method
is equal expected
"""

label_schema = LabelSchemaEntity()
g1_labels = [
LabelEntity("A", Domain.CLASSIFICATION),
LabelEntity("B", Domain.CLASSIFICATION),
LabelEntity("C", Domain.CLASSIFICATION),
]
g2_labels = [LabelEntity("D", Domain.CLASSIFICATION), LabelEntity("E", Domain.CLASSIFICATION)]
g3_labels = [LabelEntity("F", Domain.CLASSIFICATION), LabelEntity("G", Domain.CLASSIFICATION)]
g4_labels = [LabelEntity("H", Domain.CLASSIFICATION)]

label_schema.add_group(LabelGroup(name="labels1", labels=g1_labels, group_type=LabelGroupType.EXCLUSIVE))
label_schema.add_group(LabelGroup(name="labels2", labels=g2_labels, group_type=LabelGroupType.EXCLUSIVE))
label_schema.add_group(LabelGroup(name="labels3", labels=g3_labels, group_type=LabelGroupType.EXCLUSIVE))
label_schema.add_group(LabelGroup(name="labels4", labels=g4_labels, group_type=LabelGroupType.EXCLUSIVE))

label_schema.add_child(g1_labels[0], g2_labels[0])
label_schema.add_child(g1_labels[0], g2_labels[1])

label_schema.add_child(g1_labels[1], g3_labels[0])
label_schema.add_child(g1_labels[1], g3_labels[1])

predicted_labels = [
ScoredLabel(g1_labels[0], 0.6),
ScoredLabel(g1_labels[1], 0.3),
ScoredLabel(g1_labels[2], 0.1),
ScoredLabel(g2_labels[0], 0.2),
ScoredLabel(g2_labels[1], 0.8),
ScoredLabel(g3_labels[0], 0.7),
ScoredLabel(g3_labels[1], 0.3),
ScoredLabel(g4_labels[0], 0.9),
]

ref_labels = [
ScoredLabel(g1_labels[0], 0.6),
ScoredLabel(g2_labels[1], 0.8),
ScoredLabel(g4_labels[0], 0.9),
]

assert ref_labels == label_schema.resolve_labels_greedily(predicted_labels)