From 3daa1447a0ca27640f814a882f985c14d578a5a0 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 14 May 2020 15:51:11 +0200 Subject: [PATCH 1/3] fix constistent bilou tagging --- rasa/nlu/utils/bilou_utils.py | 4 ++++ tests/nlu/utils/test_bilou_utils.py | 1 + 2 files changed, 5 insertions(+) diff --git a/rasa/nlu/utils/bilou_utils.py b/rasa/nlu/utils/bilou_utils.py index 65dabedf4195..71638c6557a8 100644 --- a/rasa/nlu/utils/bilou_utils.py +++ b/rasa/nlu/utils/bilou_utils.py @@ -295,6 +295,10 @@ def _find_bilou_end(start_idx: int, predicted_tags: List[Text]) -> int: start_tag = tag_without_prefix(predicted_tags[start_idx]) while not finished: + if current_idx >= len(predicted_tags): + current_idx -= 1 + break + current_label = predicted_tags[current_idx] prefix = bilou_prefix_from_tag(current_label) tag = tag_without_prefix(current_label) diff --git a/tests/nlu/utils/test_bilou_utils.py b/tests/nlu/utils/test_bilou_utils.py index 824eaf1e2662..04b80ae310cf 100644 --- a/tests/nlu/utils/test_bilou_utils.py +++ b/tests/nlu/utils/test_bilou_utils.py @@ -163,6 +163,7 @@ def test_apply_bilou_schema(): "B- tag, L- tag pair encloses multiple entity classes", ), (["O", "B-person", "O"], ["O", "U-person", "O"], "B- tag not closed"), + (["O", "B-person"], ["O", "U-person"], None), ], ) def test_check_consistent_bilou_tagging( From cbb6e733d0972f1d16bc759d6ade3f8213aab30e Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 14 May 2020 15:55:30 +0200 Subject: [PATCH 2/3] add debug log message --- rasa/nlu/utils/bilou_utils.py | 5 +++++ tests/nlu/utils/test_bilou_utils.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/rasa/nlu/utils/bilou_utils.py b/rasa/nlu/utils/bilou_utils.py index 71638c6557a8..d4df77d4fd49 100644 --- a/rasa/nlu/utils/bilou_utils.py +++ b/rasa/nlu/utils/bilou_utils.py @@ -296,6 +296,11 @@ def _find_bilou_end(start_idx: int, predicted_tags: List[Text]) -> int: while not finished: if current_idx >= len(predicted_tags): + logger.debug( + "Inconsistent BILOU tagging found, B- tag not closed by L- tag, " + "i.e [B-a, I-a, O] instead of [B-a, L-a, O].\n" + "Assuming last tag is L- instead of I-." + ) current_idx -= 1 break diff --git a/tests/nlu/utils/test_bilou_utils.py b/tests/nlu/utils/test_bilou_utils.py index 04b80ae310cf..8413568e15fe 100644 --- a/tests/nlu/utils/test_bilou_utils.py +++ b/tests/nlu/utils/test_bilou_utils.py @@ -163,7 +163,22 @@ def test_apply_bilou_schema(): "B- tag, L- tag pair encloses multiple entity classes", ), (["O", "B-person", "O"], ["O", "U-person", "O"], "B- tag not closed"), - (["O", "B-person"], ["O", "U-person"], None), + (["O", "B-person"], ["O", "U-person"], "B- tag not closed"), + ( + ["O", "B-person", "I-person"], + ["O", "B-person", "L-person"], + "B- tag not closed", + ), + ( + ["O", "B-person", "I-location"], + ["O", "B-person", "L-person"], + "B- tag not closed", + ), + ( + ["O", "B-person", "B-location"], + ["O", "U-person", "U-location"], + "B- tag not closed", + ), ], ) def test_check_consistent_bilou_tagging( From 760f0d55387119cd447a739f9382791f616c46e5 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 14 May 2020 15:57:40 +0200 Subject: [PATCH 3/3] add changelog --- changelog/5825.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/5825.bugfix.rst diff --git a/changelog/5825.bugfix.rst b/changelog/5825.bugfix.rst new file mode 100644 index 000000000000..e0ae2e95bcb2 --- /dev/null +++ b/changelog/5825.bugfix.rst @@ -0,0 +1 @@ +Fix list index out of range error in ``ensure_consistent_bilou_tagging``.