From 903fbd8e96330aa0295f45b2abd46c75288bd313 Mon Sep 17 00:00:00 2001 From: Rob Marrowstone Date: Fri, 23 Aug 2024 11:48:45 -0700 Subject: [PATCH] Add a more efficient PathExtractor Implementation The existing implementation loops through every potential search path at each step of matching. And therefore has exponential time complexity as the number of extracted fields grows. This change adds a more efficient PathExtractor implementation that supports a narrower set of SearchPaths and combinations thereof. But avoids the exponential time cost. In the benchmark testing on my machine, I found it to be significantly faster for the "full" extraction and slightly faster for the "partial" extraction. With the throughput score from the benchmarking about 6.5:3.8 and 48:42. I also found it to be significantly faster (roughly 3:5) for extracting most of the fields out of binary Ion dataset of about 20k records, each with about 15 distinct elements, some nested. It is built with a key assumption that does not hold for the existing implementation: that users may not register paths that would result in a single element matching more than one terminal matcher. I searched for Amazon internal usages and believe that the supported usage patterns cover any and all usage I could find. I found no usage of Annotations not on the Top-Level-Values themselves. --- .../amazon/ionpathextraction/FsmMatcher.java | 27 ++ .../ionpathextraction/FsmMatcherBuilder.java | 276 ++++++++++++++++++ .../ionpathextraction/FsmPathExtractor.java | 134 +++++++++ .../PathExtractorBuilder.java | 72 ++++- .../amazon/ionpathextraction/SearchPath.java | 30 ++ .../UnsupportedPathExpression.java | 9 + .../internal/Annotations.java | 29 +- .../internal/PathExtractorConfig.java | 8 +- .../pathcomponents/Index.java | 9 + .../pathcomponents/PathComponent.java | 10 +- .../pathcomponents/Text.java | 11 +- .../pathcomponents/Wildcard.java | 11 +- .../ionpathextraction/PathExtractorTest.kt | 39 ++- 13 files changed, 656 insertions(+), 9 deletions(-) create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmMatcher.java create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java create mode 100644 src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java new file mode 100644 index 0000000..299bb52 --- /dev/null +++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java @@ -0,0 +1,27 @@ +package com.amazon.ionpathextraction; + +import com.amazon.ion.IonReader; + +import java.util.function.BiFunction; + +/** + * Base class for match states in the Finite-State-Machine matching implementation. + */ +abstract class FsmMatcher +{ + /** + * Callback for match state. May be null. + */ + BiFunction callback; + + /** + * Indicates there are no possible child transitions. + */ + boolean terminal = false; + + /** + * Return the child matcher for the given reader context. + * Return null if there is no match. + */ + abstract FsmMatcher transition(String fieldName, Integer position, String[] annotations); +} diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java new file mode 100644 index 0000000..8bbfa96 --- /dev/null +++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java @@ -0,0 +1,276 @@ +package com.amazon.ionpathextraction; + +import com.amazon.ion.IonReader; +import com.amazon.ionpathextraction.internal.Annotations; +import com.amazon.ionpathextraction.pathcomponents.Index; +import com.amazon.ionpathextraction.pathcomponents.PathComponent; +import com.amazon.ionpathextraction.pathcomponents.Text; +import com.amazon.ionpathextraction.pathcomponents.Wildcard; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.stream.Collectors; + +/** + * Builds a root FsmMatcher for a set of SearchPaths. + *
+ * One key principle in the implementation is to close over as much branching as possible at build time. + * For example: for a case-insensitive field lookup, lower case the field names once, at build time. + *
+ * The second key principle is that there should be at-most-one Matcher states for a given reader context. + * So any combination of different paths which could both be active for the same reader state are disallowed. + * For example: allowing a mix of field names and ordinal positions for a given sub-path. + *
+ * Beyond that, there are some usage patterns which could be included, such as annotations filtering on + * field names or ordinals, but for which there was no observed usage. + */ +class FsmMatcherBuilder +{ + private final PathTreeNode root = new PathTreeNode(); + private final boolean caseInsensitiveAll; + private final boolean caseInsensitiveFields; + + FsmMatcherBuilder(boolean caseInsensitiveAll, boolean caseInsensitiveFields) { + this.caseInsensitiveAll = caseInsensitiveAll; + this.caseInsensitiveFields = caseInsensitiveFields; + } + + /** + * Incorporate the searchPath into the matcher tree to be built. + * + * @throws UnsupportedPathExpression if the SearchPath is not supported. + */ + void accept(SearchPath searchPath) { + List steps = searchPath.getNormalizedPath(); + PathTreeNode currentNode = root; + for (PathComponent step : steps) { + currentNode = currentNode.acceptStep(step); + } + currentNode.setCallback(searchPath.getCallback()); + } + + /** + * Build the FsmMatcher for the set of paths. + * + * @throws UnsupportedPathExpression if the combination of SearchPaths is not supported. + */ + FsmMatcher build() { + return root.buildMatcher(); + } + + /** + * Mutable builder node to model the path tree before building into a FsmMatcher. + */ + private class PathTreeNode { + BiFunction callback; + PathTreeNode wildcard; + Map annotatedSplats = new HashMap<>(); + Map fields = new HashMap<>(); + Map indexes = new HashMap<>(); + + /** + * Find or create a new PathTreeNode for the child step. + * + * @return the new or existing node. + * @throws UnsupportedPathExpression if the step contains path components that are not supported + */ + private PathTreeNode acceptStep(PathComponent step) { + if (step.isAnnotated() && caseInsensitiveAll) { + throw new UnsupportedPathExpression( + "Case Insensitive Matching of Annotations is not supported by this matcher.\n" + + "Use the legacy matcher or set withMatchFieldNamesCaseInsensitive instead."); + } + + PathTreeNode child; + if (step instanceof Wildcard) { + if (step.isAnnotated()) { + child = annotatedSplats.computeIfAbsent(step.getAnnotations(), a -> new PathTreeNode()); + } else { + if (wildcard == null) { + wildcard = new PathTreeNode(); + } + child = wildcard; + } + } else { + if (step.isAnnotated()) { + // this is not too bad to do, but it takes care to do without impacting the non-annotated case + // which is the majority of usage. one would also want to mind the principle to avoid multiple + // distinct match paths for a given reader context and only allow either annotated or not + // for a given field name or index ordinal. + throw new UnsupportedPathExpression("Annotations are only supported on wildcards!"); + } + + if (step instanceof Text) { + String fieldName = caseInsensitiveFields + ? ((Text) step).getFieldName().toLowerCase() + : ((Text) step).getFieldName(); + child = fields.computeIfAbsent(fieldName, f -> new PathTreeNode()); + } else if (step instanceof Index) { + child = indexes.computeIfAbsent(((Index) step).getOrdinal(), i -> new PathTreeNode()); + } else { + throw new IllegalArgumentException("step of unknown runtime type: " + step.getClass()); + } + } + return child; + } + + private void setCallback(BiFunction callback) { + if (this.callback == null) { + this.callback = callback; + } else { + // this would actually be pretty simple to do: just create a ComposedCallback of BiFunctions. + throw new UnsupportedPathExpression("Callback already set!"); + } + } + + private FsmMatcher buildMatcher() { + List> matchers = new ArrayList<>(); + if (wildcard != null) { + matchers.add(new SplatMatcher<>(wildcard.buildMatcher(), callback)); + } + if (!annotatedSplats.isEmpty()) { + List> children = new ArrayList<>(annotatedSplats.size()); + List annotations = new ArrayList<>(annotatedSplats.size()); + for (Map.Entry entry : annotatedSplats.entrySet()) { + children.add(entry.getValue().buildMatcher()); + annotations.add(entry.getKey().getAnnotations()); + } + matchers.add(new AnnotationsMatcher<>(annotations, children)); + } + if (!fields.isEmpty()) { + Map> children = fields.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher())); + FsmMatcher fieldMatcher = caseInsensitiveFields + ? new CaseInsensitiveFieldMatcher<>(children, callback) + : new FieldMatcher<>(children, callback); + matchers.add(fieldMatcher); + } + if (!indexes.isEmpty()) { + Map> children = indexes.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher())); + matchers.add(new IndexMatcher<>(children, callback)); + } + + if (matchers.isEmpty()) { + return new TerminalMatcher<>(callback); + } else if (matchers.size() == 1) { + return matchers.get(0); + } else { + // the main issue with allowing more than one is that it means that any given match context + // may produce multiple matches, and search path writers may become reliant on the order + // in which callbacks for such cases are called. And in the general case, that might mean + // a crazy mix between the different types of matching, which devolves to the for-each loop + // we see in the PathExtractorImpl. + // That seems like a lot of complexity for a usage pattern of questionable value. + // So if you're reading this, and you think "oh this is a silly restriction", then take + // the time to understand why it's important to the path writer and reconsider accordingly. + throw new UnsupportedPathExpression("Only one variant of wildcard, annotated wildcard, field names, or index ordinals is supported!"); + } + } + } + + private static class SplatMatcher extends FsmMatcher + { + FsmMatcher child; + + SplatMatcher( + FsmMatcher child, + BiFunction callback) { + this.child = child; + this.callback = callback; + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) + { + return child; + } + } + + private static class FieldMatcher extends FsmMatcher + { + Map> fields; + + FieldMatcher( + Map> fields, + BiFunction callback) { + this.fields = fields; + this.callback = callback; + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) + { + return fields.get(fieldName); + } + } + + private static class CaseInsensitiveFieldMatcher extends FieldMatcher { + CaseInsensitiveFieldMatcher( + Map> fields, + BiFunction callback) { + super(fields, callback); + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) + { + return fields.get(fieldName.toLowerCase()); + } + } + + private static class IndexMatcher extends FsmMatcher + { + Map> indexes; + + IndexMatcher( + Map> indexes, + BiFunction callback) { + this.indexes = indexes; + this.callback = callback; + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) { + return indexes.get(position); + } + } + + private static class TerminalMatcher extends FsmMatcher + { + TerminalMatcher(BiFunction callback) { + this.callback = callback; + this.terminal = true; + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) { + return null; + } + } + + private static class AnnotationsMatcher extends FsmMatcher + { + List candidates; + List> matchers; + + AnnotationsMatcher(List candidates, List> matchers) { + this.candidates = candidates; + this.matchers = matchers; + } + + @Override + FsmMatcher transition(String fieldName, Integer position, String[] annotations) { + for (int i = 0; i < candidates.size(); i++) { + if (Arrays.equals(candidates.get(i), annotations)) { + return matchers.get(i); + } + } + return null; + } + } +} diff --git a/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java new file mode 100644 index 0000000..8f4b03c --- /dev/null +++ b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java @@ -0,0 +1,134 @@ +package com.amazon.ionpathextraction; + +import com.amazon.ion.IonReader; +import com.amazon.ion.IonType; +import com.amazon.ionpathextraction.internal.PathExtractorConfig; + +import java.util.List; +import java.util.function.BiFunction; + +import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument; +import static com.amazon.ionpathextraction.internal.Preconditions.checkState; + +/** + * A PathExtractor modeled as a Finite State Machine. + *
+ * Compared to the PathExtractorImpl, this supports a narrower set of + * SearchPaths and their combinations, but is more performant, particularly + * when a large number of field names are searched for. + *
+ * A more comprehensive explanation of the strictness from a user PoV can be + * found on the PathExtractorBuilder.buildStrict() API method. Notes on the + * 'why' can be found in the FsmMatcherBuilder. + */ +class FsmPathExtractor implements PathExtractor { + private final FsmMatcher rootMatcher; + private final PathExtractorConfig config; + + private FsmPathExtractor(FsmMatcher rootMatcher, PathExtractorConfig config) { + this.rootMatcher = rootMatcher; + this.config = config; + } + + static FsmPathExtractor create(List> searchPaths, PathExtractorConfig config) { + FsmMatcherBuilder builder = new FsmMatcherBuilder<>(config.isMatchCaseInsensitive(), config.isMatchFieldsCaseInsensitive()); + for (SearchPath path : searchPaths) { + builder.accept(path); + } + + return new FsmPathExtractor<>(builder.build(), config); + } + + @Override + public void match(IonReader reader) { + match(reader, null); + } + + @Override + public void match(IonReader reader, T context) { + checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), + "reader must be at depth zero, it was at: " + reader.getDepth()); + + while (reader.next() != null) { + matchCurrentValue(reader, context); + } + } + + @Override + public void matchCurrentValue(IonReader reader) { + matchCurrentValue(reader, null); + } + + @Override + public void matchCurrentValue(IonReader reader, T context) { + checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(), + "reader must be at depth zero, it was at: " + reader.getDepth()); + checkArgument(reader.getType() != null, + "reader must be positioned at a value; call IonReader.next() first."); + + match(reader, rootMatcher, context, null, reader.getDepth()); + } + + private int match( + IonReader reader, + FsmMatcher matcher, + T context, + Integer position, + int initialDepth) { + FsmMatcher child = matcher.transition(reader.getFieldName(), position, reader.getTypeAnnotations()); + if (child == null) { + return 0; + } + + if (child.callback != null) { + int stepOut = invokeCallback(reader, child.callback, initialDepth, context); + if (stepOut > 0) { + return stepOut; + } + } + + if (IonType.isContainer(reader.getType()) && !child.terminal) { + reader.stepIn(); + int childPos = 0; + int stepOut = 0; + while (reader.next() != null && stepOut == 0) { + stepOut = match(reader, child, context, childPos++, initialDepth); + } + reader.stepOut(); + if (stepOut > 0) { + return stepOut - 1; + } + } + + return 0; + } + + private int invokeCallback( + final IonReader reader, + final BiFunction callback, + final int initialReaderDepth, + final T context) { + int previousReaderDepth = reader.getDepth(); + + int stepOutTimes = callback.apply(reader, context); + int newReaderDepth = reader.getDepth(); + + checkState(previousReaderDepth == newReaderDepth, + "Reader must be at same depth when returning from callbacks. initial: " + + previousReaderDepth + + ", new: " + + newReaderDepth); + + // we don't allow users to step out the initial reader depth + int readerRelativeDepth = reader.getDepth() - initialReaderDepth; + + checkState(stepOutTimes <= readerRelativeDepth, + "Callback return cannot be greater than the reader current relative depth." + + " return: " + + stepOutTimes + + ", relative reader depth: " + + readerRelativeDepth); + + return stepOutTimes; + } +} diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java index bd6b597..b4b1120 100644 --- a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java +++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java @@ -35,6 +35,7 @@ public final class PathExtractorBuilder { private final List> searchPaths = new ArrayList<>(); private boolean matchRelativePaths; private boolean matchCaseInsensitive; + private boolean matchFieldsCaseInsensitive; private PathExtractorBuilder() { } @@ -48,19 +49,66 @@ public static PathExtractorBuilder standard() { PathExtractorBuilder builder = new PathExtractorBuilder<>(); builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE; builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS; + builder.matchFieldsCaseInsensitive = DEFAULT_CASE_INSENSITIVE; return builder; } /** * Instantiates a thread safe {@link PathExtractor} configured by this builder. + * Attempts to build a "strict" PathExtractor which is much more performant, particularly + * for extractions with many field names. Falls back to the "legacy" implementation. * + * @see this.buildStrict() to ensure the more optimal implementation is used. * @return new {@link PathExtractor} instance. */ public PathExtractor build() { - return new PathExtractorImpl<>(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive)); + try { + return buildStrict(); + } catch (UnsupportedPathExpression e) { + return buildLegacy(); + } } + /** + * Instantiate a "stricter" and more optimized PathExtractor. + *
+ * Supports search paths where there is only one "variant" of step + * type from each parent step, and only one callback per state. + * Annotations matching is only supported on the root or wildcards. + * Case insensitivity is supported on field names, not annotations. + *
+ * Examples of supported paths (and any combination of the below): + * `A::()` + * `(foo bar)` + * `(foo qux)` + * `(spam 0)` + * `(spam 1)` + * `(quid * quo)` + * `(lorem A::B::* ipsum)` + *
+ * Examples of unsupported paths: + * `(a::foo)` annotations on field names not supported, yet. + * `(a::1)` annotations on index ordinals not supported, yet. + * `(foo bar) (foo 1) (foo *)` combination of field names, index ordinals or wildcards not supported. + * `a::() ()` combination of annotated and non-annotated root (or other wildcard) matching. + * + * @throws UnsupportedPathExpression if any search path or the paths combined, are not supported. + * @return new {@link PathExtractor} instance. + */ + public PathExtractor buildStrict() { + return FsmPathExtractor.create(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive)); + } + + /** + * Instantiate a "legacy" PathExtractor implementation. + * The returned PathExtractor is inefficient when a large number of field names is searched, + * but a wider variety of search paths are supported. + */ + public PathExtractor buildLegacy() { + return new PathExtractorImpl<>(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive)); + } + /** * Sets matchRelativePaths config. When true the path extractor will accept readers at any depth, when false the * reader must be at depth zero. @@ -78,8 +126,9 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ } /** - * Sets matchCaseInsensitive config. When true the path extractor will match fields ignoring case, when false the - * path extractor will mach respecting the path components case. + * Sets matchCaseInsensitive config. When true the path extractor will match fields _and annotations_ ignoring case. + * When false the path extractor will match respecting the path components case. + * To set case insensitivity for _only field names_ use the `withMatchFieldNamesCaseInsensitive` builder. * *
* defaults to false. @@ -89,6 +138,23 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ */ public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) { this.matchCaseInsensitive = matchCaseInsensitive; + this.matchFieldsCaseInsensitive = matchCaseInsensitive; + + return this; + } + + /** + * Sets matchFieldNamesCaseInsensitive config. When true the path extractor will match field names ignoring case. + * For example: 'Foo' will match 'foo'. + * + *
+ * defaults to false. + * + * @param matchCaseInsensitive new config value. + * @return builder for chaining. + */ + public PathExtractorBuilder withMatchFieldNamesCaseInsensitive(final boolean matchCaseInsensitive) { + this.matchFieldsCaseInsensitive = matchCaseInsensitive; return this; } diff --git a/src/main/java/com/amazon/ionpathextraction/SearchPath.java b/src/main/java/com/amazon/ionpathextraction/SearchPath.java index 5e57927..1a8859c 100644 --- a/src/main/java/com/amazon/ionpathextraction/SearchPath.java +++ b/src/main/java/com/amazon/ionpathextraction/SearchPath.java @@ -17,6 +17,9 @@ import com.amazon.ionpathextraction.internal.Annotations; import com.amazon.ionpathextraction.internal.MatchContext; import com.amazon.ionpathextraction.pathcomponents.PathComponent; +import com.amazon.ionpathextraction.pathcomponents.Wildcard; + +import java.util.ArrayList; import java.util.List; import java.util.function.BiFunction; @@ -46,6 +49,19 @@ int size() { return pathComponents.size(); } + /** + * Produces a "normalized" path for the SearchPath. + * Basically: the SearchPath has the annotations (or not) for matching top-level-values. + * The "normalized" path treats this as an explicit Wildcard step and adds it to the head + * of the PathComponents. + */ + List getNormalizedPath() { + List normalizedPath = new ArrayList<>(pathComponents.size() + 1); + normalizedPath.add(new Wildcard(annotations)); + normalizedPath.addAll(pathComponents); + return normalizedPath; + } + /** * Callback to be invoked when the Search Path is matched. */ @@ -67,4 +83,18 @@ boolean partialMatchAt(final MatchContext context) { return false; } + + public String toString() { + StringBuilder builder = new StringBuilder(); + + // todo: annotations! + builder.append("("); + for (PathComponent pathComponent : pathComponents) { + builder.append(pathComponent.toString()); + builder.append(" "); + } + builder.append(")"); + + return builder.toString(); + } } \ No newline at end of file diff --git a/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java new file mode 100644 index 0000000..2af1ae9 --- /dev/null +++ b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java @@ -0,0 +1,9 @@ +package com.amazon.ionpathextraction; + +public class UnsupportedPathExpression extends RuntimeException +{ + public UnsupportedPathExpression(String msg) + { + super(msg); + } +} diff --git a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java index df3e823..1ea737d 100644 --- a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java +++ b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java @@ -13,6 +13,7 @@ package com.amazon.ionpathextraction.internal; +import java.util.Arrays; import java.util.stream.IntStream; /** @@ -24,7 +25,9 @@ * Internal only. Not intended for application use. *

*/ -public class Annotations { +public final class Annotations { + + public static final Annotations EMPTY = new Annotations(new String[] {}); final String[] rawAnnotations; @@ -35,6 +38,14 @@ public Annotations(final String[] rawAnnotations) { this.rawAnnotations = rawAnnotations; } + public String[] getAnnotations() { + return rawAnnotations; + } + + public boolean hasAnnotations() { + return rawAnnotations.length > 0; + } + /** * returns true if it matches on the annotations provided. */ @@ -51,4 +62,20 @@ private static boolean arrayEquals(final String[] left, final String[] right, fi return IntStream.range(0, left.length) .allMatch(i -> ignoreCase ? left[i].equalsIgnoreCase(right[i]) : left[i].equals(right[i])); } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (!(o instanceof Annotations)) { + return false; + } + return Arrays.equals(rawAnnotations, ((Annotations) o).rawAnnotations); + } + + @Override + public int hashCode() { + return Arrays.hashCode(rawAnnotations); + } } diff --git a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java index df8951c..c3c3966 100644 --- a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java +++ b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java @@ -20,10 +20,12 @@ public final class PathExtractorConfig { private final boolean matchRelativePaths; private final boolean matchCaseInsensitive; + private final boolean matchFieldsCaseInsensitive; - public PathExtractorConfig(final boolean matchRelativePaths, final boolean matchCaseInsensitive) { + public PathExtractorConfig(final boolean matchRelativePaths, final boolean matchCaseInsensitive, boolean matchFieldsCaseInsensitive) { this.matchRelativePaths = matchRelativePaths; this.matchCaseInsensitive = matchCaseInsensitive; + this.matchFieldsCaseInsensitive = matchFieldsCaseInsensitive; } public boolean isMatchRelativePaths() { @@ -33,4 +35,8 @@ public boolean isMatchRelativePaths() { public boolean isMatchCaseInsensitive() { return matchCaseInsensitive; } + + public boolean isMatchFieldsCaseInsensitive() { + return matchFieldsCaseInsensitive; + } } diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java index 21c5d09..ce0f9e8 100644 --- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java +++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java @@ -41,8 +41,17 @@ public Index(final int ordinal, final String[] annotations) { this.ordinal = ordinal; } + public Integer getOrdinal() { + return ordinal; + } + @Override public boolean innerMatches(final MatchContext context) { return ordinal == context.getReaderContainerIndex(); } + + @Override + public String toString() { + return String.valueOf(ordinal); + } } diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java index 3f88a28..ce7af7e 100644 --- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java +++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java @@ -29,7 +29,7 @@ */ public abstract class PathComponent { - private final Annotations annotations; + protected final Annotations annotations; PathComponent(final Annotations annotations) { checkArgument(annotations != null, "fieldName cannot be null"); @@ -37,6 +37,14 @@ public abstract class PathComponent { this.annotations = annotations; } + public Annotations getAnnotations() { + return annotations; + } + + public boolean isAnnotated() { + return annotations.hasAnnotations(); + } + /** * Checks if this component matches the current reader position with the given configuration. * diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java index e597af5..739eabc 100644 --- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java +++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java @@ -46,6 +46,10 @@ public Text(final String fieldName, final String[] annotations) { this.fieldName = fieldName; } + public String getFieldName() { + return fieldName; + } + @Override public boolean innerMatches(final MatchContext context) { final IonReader reader = context.getReader(); @@ -53,8 +57,13 @@ public boolean innerMatches(final MatchContext context) { return false; } - return context.getConfig().isMatchCaseInsensitive() + return context.getConfig().isMatchFieldsCaseInsensitive() ? fieldName.equalsIgnoreCase(reader.getFieldName()) : fieldName.equals(reader.getFieldName()); } + + @Override + public String toString() { + return fieldName; + } } diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java index c5d0c0a..0c541ea 100644 --- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java +++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java @@ -32,11 +32,20 @@ public final class Wildcard extends PathComponent { public static final String TEXT = "*"; public Wildcard(final String[] annotations) { - super(new Annotations(annotations)); + this(new Annotations(annotations)); + } + + public Wildcard(final Annotations annotations) { + super(annotations); } @Override public boolean innerMatches(final MatchContext context) { return true; } + + @Override + public String toString() { + return TEXT; + } } diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt index 6394e05..4d027a2 100644 --- a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt +++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt @@ -220,7 +220,7 @@ class PathExtractorTest { reader.stepIn() val exception = assertThrows { api.match(extractor, reader) } - assertEquals("reader must be at depth zero, it was at:1", exception.message) + assertEquals("reader must be at depth zero, it was at: 1", exception.message) } @ParameterizedTest @@ -259,6 +259,43 @@ class PathExtractorTest { } } + @ParameterizedTest + @EnumSource(API::class) + fun caseInsensitiveWithSimilarFieldNames(api: API) { + val extractor = PathExtractorBuilder.standard() + .withMatchCaseInsensitive(true) + .withSearchPath("(foo)", collectToIonList(0)) + .withSearchPath("(Foo)", collectToIonList(0)) + .build() + + val out = ION.newEmptyList() + api.match(extractor, ION.newReader("{FOO: 1, foO: 2}{foo: 3}{fOo: 4}{bar: 5}"), out) + + if (api == API.MATCH_CURRENT_VALUE) { + assertEquals(ION.singleValue("[1,1,2,2]"), out) + } else { + assertEquals(ION.singleValue("[1,1,2,2,3,3,4,4]"), out) + } + } + + @ParameterizedTest + @EnumSource(API::class) + fun caseInsensitiveAnnotations(api: API) { + val extractor = PathExtractorBuilder.standard() + .withMatchCaseInsensitive(true) + .withSearchPath("A::()", collectToIonList(0)) + .build() + + val out = ION.newEmptyList() + api.match(extractor, ION.newReader("a::17 b::31 A::51"), out) + + if (api == API.MATCH_CURRENT_VALUE) { + assertEquals(ION.singleValue("[a::17]"), out) + } else { + assertEquals(ION.singleValue("[a::17, A::51]"), out) + } + } + @ParameterizedTest @EnumSource(API::class) fun stepOutMoreThanPermitted(api: API) {