From 3ba13f484c35641910d21362575ff16b71261f0f Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Sat, 28 Sep 2024 22:52:36 +0900 Subject: [PATCH 1/6] Fix bug 'DefaultPassageFormatter.format' method --- .../uhighlight/DefaultPassageFormatter.java | 2 +- .../TestDefaultPassageFormatter.java | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java index 27281a91be7e..51df27a5ed88 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java @@ -64,7 +64,7 @@ public String format(Passage[] passages, String content) { int pos = 0; for (Passage passage : passages) { // don't add ellipsis if its the first one, or if its connected. - if (passage.getStartOffset() > pos && pos > 0) { + if (passage.getStartOffset() != pos && pos > 0) { sb.append(ellipsis); } pos = passage.getStartOffset(); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java index b59fea47453e..cc5238dec5d9 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java @@ -75,4 +75,29 @@ public void testOverlappingPassages() throws Exception { "Yin yang loooooooooong, yin gap yang yong", formatter.format(passages, content)); } + + public void testReversedStartOffsetOrder() { + String content = "When indexing data in Solr, each document is composed of various fields. " + + "A document essentially represents a single record, and each document typically contains a unique ID field."; + + Passage[] passages = new Passage[2]; + passages[0] = new Passage(); + passages[0].setStartOffset(73); + passages[0].setEndOffset(179); + passages[0].setScore(1.8846991f); + passages[0].addMatch(75, 83, new BytesRef("document"), 1); + passages[0].addMatch(133, 141, new BytesRef("document"), 1); + + passages[1] = new Passage(); + passages[1].setStartOffset(0); + passages[1].setEndOffset(73); + passages[1].setScore(1.5923802f); + passages[1].addMatch(33, 41, new BytesRef("document"), 1); + + DefaultPassageFormatter formatter = new DefaultPassageFormatter("", "", "\n ", false); + assertEquals( + "A document essentially represents a single record, and each document typically contains a unique ID field.\n" + + " When indexing data in Solr, each document is composed of various fields. ", + formatter.format(passages, content)); + } } From 586d5f0072b206b69a42c095a6cd5146d0fa55d0 Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Sat, 28 Sep 2024 23:50:22 +0900 Subject: [PATCH 2/6] tidy applied --- .../uhighlight/TestDefaultPassageFormatter.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java index cc5238dec5d9..299f1c2fc60c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java @@ -77,8 +77,9 @@ public void testOverlappingPassages() throws Exception { } public void testReversedStartOffsetOrder() { - String content = "When indexing data in Solr, each document is composed of various fields. " + - "A document essentially represents a single record, and each document typically contains a unique ID field."; + String content = + "When indexing data in Solr, each document is composed of various fields. " + + "A document essentially represents a single record, and each document typically contains a unique ID field."; Passage[] passages = new Passage[2]; passages[0] = new Passage(); @@ -96,8 +97,8 @@ public void testReversedStartOffsetOrder() { DefaultPassageFormatter formatter = new DefaultPassageFormatter("", "", "\n ", false); assertEquals( - "A document essentially represents a single record, and each document typically contains a unique ID field.\n" + - " When indexing data in Solr, each document is composed of various fields. ", - formatter.format(passages, content)); + "A document essentially represents a single record, and each document typically contains a unique ID field.\n" + + " When indexing data in Solr, each document is composed of various fields. ", + formatter.format(passages, content)); } } From db47d0dd1ddc17cd5fc74ef93a968dcc1f0b31de Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Tue, 1 Oct 2024 14:12:42 +0900 Subject: [PATCH 3/6] Modify if statement in 'DefaultPassageFormatter.format' Co-authored-by: David Smiley --- .../lucene/search/uhighlight/DefaultPassageFormatter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java index 51df27a5ed88..4cd2b07fc1ca 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/DefaultPassageFormatter.java @@ -64,7 +64,7 @@ public String format(Passage[] passages, String content) { int pos = 0; for (Passage passage : passages) { // don't add ellipsis if its the first one, or if its connected. - if (passage.getStartOffset() != pos && pos > 0) { + if (!sb.isEmpty() && passage.getStartOffset() != pos) { sb.append(ellipsis); } pos = passage.getStartOffset(); From 8b1d0c5ecd2851dafd7d2a82308a40242c7e58e0 Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Thu, 3 Oct 2024 04:49:14 +0900 Subject: [PATCH 4/6] Modify TestDefaultPassageFormatter.testReversedStartOffsetOrder --- .../lucene/search/uhighlight/TestDefaultPassageFormatter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java index 299f1c2fc60c..617077c987c4 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestDefaultPassageFormatter.java @@ -95,10 +95,10 @@ public void testReversedStartOffsetOrder() { passages[1].setScore(1.5923802f); passages[1].addMatch(33, 41, new BytesRef("document"), 1); - DefaultPassageFormatter formatter = new DefaultPassageFormatter("", "", "\n ", false); + DefaultPassageFormatter formatter = new DefaultPassageFormatter("", "", "\n", false); assertEquals( "A document essentially represents a single record, and each document typically contains a unique ID field.\n" - + " When indexing data in Solr, each document is composed of various fields. ", + + "When indexing data in Solr, each document is composed of various fields. ", formatter.format(passages, content)); } } From 2b2e8a8282489e178b4661d56678172c9f1389bb Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Thu, 3 Oct 2024 04:50:41 +0900 Subject: [PATCH 5/6] Modify CHANGES.txt --- lucene/CHANGES.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 99925865bba6..f6b667553d74 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -49,7 +49,7 @@ Optimizations Bug Fixes --------------------- -(No changes) + Other --------------------- @@ -270,6 +270,9 @@ Bug Fixes * GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those of DoubleValues#doubleValue(). (Uwe Schindler) +* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended + when they were not sorted by startOffset. (Seunghan Jung) + Changes in Runtime Behavior --------------------- From 348591f99858d25a87a004ed60c351e72976ec28 Mon Sep 17 00:00:00 2001 From: Seunghan-Jung Date: Thu, 3 Oct 2024 04:59:23 +0900 Subject: [PATCH 6/6] Reverted accidental changes in CHANGES.txt --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f6b667553d74..db61c5eb6c62 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -49,7 +49,7 @@ Optimizations Bug Fixes --------------------- - +(No changes) Other ---------------------