From 82b08045fd3c2348978fd6ba79b881ac0b3d8a34 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 3 Oct 2024 12:21:43 +0100 Subject: [PATCH] Prevent flattening of ordered and unordered interval sources (#13819) This commit removes the flattening of ordered and unordered interval sources, as it alters the gap visibility for parent intervals. For example, ordered("a", ordered("b", "c")) should result in a different gap compared to ordered("a", "b", "c"). Phrase/Block operators will continue to flatten their sub-sources since this does not affect the inner gap (which is always 0 in the case of blocks). --- .../intervals/BlockIntervalsSource.java | 1 + .../intervals/OrderedIntervalsSource.java | 14 +---------- .../intervals/UnorderedIntervalsSource.java | 14 +---------- .../queries/intervals/TestIntervalQuery.java | 24 +++++++++++++++++++ 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/BlockIntervalsSource.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/BlockIntervalsSource.java index 91a5e94d5f2b..efb03b154ef3 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/BlockIntervalsSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/BlockIntervalsSource.java @@ -38,6 +38,7 @@ private static List flatten(List sources) { List flattened = new ArrayList<>(); for (IntervalsSource s : sources) { if (s instanceof BlockIntervalsSource) { + // Block sources can be flattened because they do not increase the gap (gap = 0) flattened.addAll(((BlockIntervalsSource) s).subSources); } else { flattened.add(s); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java index 65fa6d033957..a89df974b662 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/OrderedIntervalsSource.java @@ -30,25 +30,13 @@ static IntervalsSource build(List sources) { if (sources.size() == 1) { return sources.get(0); } - List rewritten = deduplicate(flatten(sources)); + List rewritten = deduplicate(sources); if (rewritten.size() == 1) { return rewritten.get(0); } return new OrderedIntervalsSource(rewritten); } - private static List flatten(List sources) { - List flattened = new ArrayList<>(); - for (IntervalsSource s : sources) { - if (s instanceof OrderedIntervalsSource) { - flattened.addAll(((OrderedIntervalsSource) s).subSources); - } else { - flattened.add(s); - } - } - return flattened; - } - private static List deduplicate(List sources) { List deduplicated = new ArrayList<>(); List current = new ArrayList<>(); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/intervals/UnorderedIntervalsSource.java b/lucene/queries/src/java/org/apache/lucene/queries/intervals/UnorderedIntervalsSource.java index 132ab4b3976e..d2c708b4feed 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/intervals/UnorderedIntervalsSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/intervals/UnorderedIntervalsSource.java @@ -33,7 +33,7 @@ static IntervalsSource build(List sources) { if (sources.size() == 1) { return sources.get(0); } - List rewritten = deduplicate(flatten(sources)); + List rewritten = deduplicate(sources); if (rewritten.size() == 1) { return rewritten.get(0); } @@ -55,18 +55,6 @@ private static List deduplicate(List sources) return deduplicated; } - private static List flatten(List sources) { - List flattened = new ArrayList<>(); - for (IntervalsSource s : sources) { - if (s instanceof UnorderedIntervalsSource) { - flattened.addAll(((UnorderedIntervalsSource) s).subSources); - } else { - flattened.add(s); - } - } - return flattened; - } - private UnorderedIntervalsSource(List sources) { super(sources); } diff --git a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java index af2ec0230c23..a0539a0f8520 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/intervals/TestIntervalQuery.java @@ -338,6 +338,18 @@ public void testNestedOrInUnorderedMaxGaps() throws IOException { checkHits(q, new int[] {6, 7}); } + public void testUnorderedWithNoGap() throws IOException { + Query q = + new IntervalQuery( + field, + Intervals.maxgaps( + 0, + Intervals.unordered( + Intervals.term("w3"), + Intervals.unordered(Intervals.term("w1"), Intervals.term("w5"))))); + checkHits(q, new int[] {0}); + } + public void testOrderedWithGaps() throws IOException { Query q = new IntervalQuery( @@ -360,6 +372,18 @@ public void testOrderedWithGaps2() throws IOException { checkHits(q, new int[] {12}); } + public void testOrderedWithNoGap() throws IOException { + Query q = + new IntervalQuery( + field, + Intervals.maxgaps( + 0, + Intervals.ordered( + Intervals.ordered(Intervals.term("w1"), Intervals.term("w4")), + Intervals.term("w5")))); + checkHits(q, new int[] {0}); + } + public void testNestedOrInContainedBy() throws IOException { Query q = new IntervalQuery(