diff --git a/docs/changelog.md b/docs/changelog.md index b527f20a025..17fa9db6e27 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -256,6 +256,32 @@ Following, classes are removed and have to be replaced by tinkerpop equivalent: | `org.janusgraph.channelizers.JanusGraphNioChannelizer` | `org.apache.tinkerpop.gremlin.server.channel.NioChannelizer` | | `org.janusgraph.channelizers.JanusGraphWsAndHttpChannelizer` | `org.apache.tinkerpop.gremlin.server.channel.WsAndHttpChannelizer` | +##### Breaking change Lucene and Solr fuzzy predicates + +The text predicates `text.textFuzzy` and `text.textContainsFuzzy` have been updated in both the Lucene and Solr indexing +backends to align with JanusGraph and Elastic. These predicates now inspect the query length to determine the Levenshtein +distance, where previously they used the backend's default max distance of 2: + +- 0 for strings of one or two characters (exact match) +- 1 for strings of three, four or five characters +- 2 for strings of more than five characters + +**Change Matrix:** + +| text | query | previous result | new result | +| --- | --- | --- | --- | +| ah | ah | true | true | +| ah | ai | true | **false** | +| hop | hop | true | true | +| hop | hap | true | true | +| hop | hoop | true | true | +| hop | hooop | true | **false** | +| surprises | surprises | true | true | +| surprises | surprizes | true | true | +| surprises | surpprises | true | true | +| surprises | surpprisess | false | false | + + ### Version 0.5.3 (Release Date: December 24, 2020) === "Maven" diff --git a/docs/index-backend/text-search.md b/docs/index-backend/text-search.md index 3cf82218aab..f277ec75604 100644 --- a/docs/index-backend/text-search.md +++ b/docs/index-backend/text-search.md @@ -68,6 +68,15 @@ g.V().has('booksummary', textContainsRegex('.*corn.*')) g.V().has('booksummary', textContainsFuzzy('unicorn')) ``` +The Elasticsearch backend extends this functionality and includes support for negations +of the above predicates, as well as phrase matching: + +- `textNotContains`: is true if no words inside the text string match the query string +- `textNotContainsPrefix`: is true if no words inside the text string begin with the query string +- `textNotContainsRegex`: is true if no words inside the text string match the given regular expression +- `textNotContainsFuzzy`: is true if no words inside the text string are similar to the query string (based on Levenshtein edit distance) +- `textNotContainsPhrase`: is true if the text string does not contain the sequence of words in the query string + String search predicates (see below) may be used in queries, but those require filtering in memory which can be very costly. @@ -111,6 +120,13 @@ g.V().has('bookname', textRegex('.*corn.*')) g.V().has('bookname', textFuzzy('unicorn')) ``` +The Elasticsearch backend extends this functionality and includes support for negations +of the above text predicates: + +- `textNotPrefix`: if the string value does not start with the given query string +- `textNotRegex`: if the string value does not match the given regular expression in its entirety +- `textNotFuzzy`: if the string value is not similar to the given query string (based on Levenshtein edit distance) + Full-text search predicates may be used in queries, but those require filtering in memory which can be very costly. diff --git a/docs/interactions/search-predicates.md b/docs/interactions/search-predicates.md index ffefaf3497f..3b93c441ece 100644 --- a/docs/interactions/search-predicates.md +++ b/docs/interactions/search-predicates.md @@ -27,13 +27,22 @@ The `Text` enum specifies the [Text Search](../index-backend/text-search.md) use * Text search predicates which match against the individual words inside a text string after it has been tokenized. These predicates are not case sensitive. - `textContains`: is true if (at least) one word inside the text string matches the query string + - `textNotContains`: is true if no words inside the text string match the query string - `textContainsPrefix`: is true if (at least) one word inside the text string begins with the query string + - `textNotContainsPrefix`: is true if no words inside the text string begin with the query string - `textContainsRegex`: is true if (at least) one word inside the text string matches the given regular expression - - `textContainsFuzzy`: is true if (at least) one word inside the text string is similar to the query String (based on Levenshtein edit distance) + - `textNotContainsRegex`: is true if no words inside the text string match the given regular expression + - `textContainsFuzzy`: is true if (at least) one word inside the text string is similar to the query string (based on Levenshtein edit distance) + - `textNotContainsFuzzy`: is true if no words inside the text string are similar to the query string (based on Levenshtein edit distance) + - `textContainsPhrase`: is true if the text string contains the exact sequence of words in the query string + - `textNotContainsPhrase`: is true if the text string does not contain the sequence of words in the query string * String search predicates which match against the entire string value - `textPrefix`: if the string value starts with the given query string + - `textNotPrefix`: if the string value does not start with the given query string - `textRegex`: if the string value matches the given regular expression in its entirety + - `textNotRegex`: if the string value does not match the given regular expression in its entirety - `textFuzzy`: if the string value is similar to the given query string (based on Levenshtein edit distance) + - `textNotFuzzy`: if the string value is not similar to the given query string (based on Levenshtein edit distance) See [Text Search](../index-backend/text-search.md) for more information about full-text and string search. diff --git a/janusgraph-backend-testutils/src/main/java/org/janusgraph/core/attribute/TextArgument.java b/janusgraph-backend-testutils/src/main/java/org/janusgraph/core/attribute/TextArgument.java new file mode 100644 index 00000000000..c1444b71784 --- /dev/null +++ b/janusgraph-backend-testutils/src/main/java/org/janusgraph/core/attribute/TextArgument.java @@ -0,0 +1,474 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.core.attribute; + +import org.janusgraph.graphdb.query.JanusGraphPredicate; +import org.junit.jupiter.params.provider.Arguments; + +import java.util.Arrays; +import java.util.stream.Stream; + +import static org.junit.jupiter.params.provider.Arguments.arguments; + +/** + * Predicate test parameters + * + * @author Andrew Sheppard (andrew.sheppard@fireeye.com) + */ +public class TextArgument { + + public static final String text = "This world is full of 1funny surprises! A Full Yes"; + public static final String name = "fully funny"; + public static final String shortValue = "ah"; + public static final String mediumValue = "hop"; + public static final String longValue = "surprises"; + + private static Stream addPredicate(JanusGraphPredicate predicate, Stream argStream) { + return argStream.map(argList -> { + Object[] rawArgs = argList.get(); + return arguments(predicate, rawArgs[0], rawArgs[1], rawArgs[2]); + }); + } + + private static Stream negate(Stream argStream) { + return argStream.map(argList -> { + Object[] rawArgs = argList.get(); + return arguments(!((boolean) rawArgs[0]), rawArgs[1], rawArgs[2]); + }); + } + + /** + * Common arguments for Text.CONTAINS and Text.NOT_CONTAINS tests + * + * @return + */ + private static Stream textContainsCommon() { + return Arrays.stream(new Arguments[] { + arguments(true, text, "world"), + arguments(true, text, "wOrLD"), + arguments(false, text, "worl"), + + arguments(true, text, "this"), + arguments(true, text, "yes"), + arguments(false, text, "funny"), + + arguments(true, text, "surprises"), + arguments(true, text, "FULL"), + + arguments(true, text, "full surprises"), + arguments(true, text, "full,surprises,world"), + arguments(true, text, "a world"), + + arguments(false, text, "full bunny") + }); + } + + /** + * Common arguments for Text.PREFIX and Text.NOT_PREFIX tests + * + * @return + */ + private static Stream textPrefixCommon() { + return Arrays.stream(new Arguments[] { + arguments(true, name, "fully"), + arguments(true, name, "ful"), + arguments(true, name, "fully fu"), + arguments(false, name, "fun") + }); + } + + /** + * Common arguments for Text.CONTAINS_PREFIX and Text.NOT_CONTAINS_PREFIX tests + * + * @return + */ + private static Stream textContainsPrefixCommon() { + return Arrays.stream(new Arguments[] { + arguments(true, name, "fully"), + arguments(true, name, "ful"), + + arguments(true, text, "worl"), + arguments(true, text, "wORl"), + arguments(true, text, "ye"), + arguments(true, text, "Y"), + + arguments(false, text, "fo"), + arguments(false, text, "of 1f"), + arguments(false, text, "ses"), + + arguments(true, name, "fun"), + }); + } + + /** + * Common arguments for Text.REGEX and Text.NOT_REGEX tests + * + * @return + */ + private static Stream textRegexCommon() { + return Arrays.stream(new Arguments[] { + // tailing wildcard + arguments(true, "over", "o.*"), + arguments(true, "over", "ove.?"), + arguments(true, "over", "ove[rst]?"), + + // leading wildcard + arguments(true, "over", ".*r"), + arguments(true, "over", ".*ver"), + arguments(true, "over", ".?ver"), + arguments(true, "over", "[opr]?ver"), + + // inner wildcard + arguments(true, "over", "o.*r"), + arguments(true, "over", "o.*er"), + arguments(true, "over", "o.?er"), + arguments(true, "over", "o[ve]*r"), + arguments(true, "over", "o.+r"), + + arguments(true, name, "(fu[ln]*y) (fu[ln]*y)"), + arguments(false, name, "(fu[l]*y) (fu[l]*y)"), + arguments(true, name, "(fu[l]*y) .*") + }); + } + + /** + * Common arguments for Text.CONTAINS_REGEX and Text.NOT_CONTAINS_REGEX tests + * + * @return + */ + private static Stream textContainsRegexCommon() { + return Arrays.stream(new Arguments[] { + // tailing wildcard + arguments(true, "over", "o.*"), + arguments(true, "over", "ove.?"), + arguments(true, "over", "ove[rst]?"), + + // leading wildcard + arguments(true, "over", ".*r"), + arguments(true, "over", ".*ver"), + arguments(true, "over", ".?ver"), + arguments(true, "over", "[opr]?ver"), + + // inner wildcard + arguments(true, "over", "o.*r"), + arguments(true, "over", "o.*er"), + arguments(true, "over", "o.?er"), + arguments(true, "over", "o[ve]*r"), + arguments(true, "over", "o.+r"), + + arguments(true, text, "fu[l]+"), + arguments(true, text, "wor[ld]{1,2}"), + + arguments(false, text, "fo"), + arguments(false, text, "wor[l]+"), + arguments(false, text, "wor[ld]{3,5}") + }); + } + + /** + * Common arguments for Text.CONTAINS_PHRASE and Text.NOT_CONTAINS_PHRASE tests + * + * @return + */ + private static Stream textContainsPhraseCommon() { + return Arrays.stream(new Arguments[] { + arguments(true, text, "world"), + arguments(true, text, "wOrLD"), + arguments(false, text, "worl"), + + arguments(true, text, "this"), + arguments(true, text, "yes"), + arguments(false, text, "funny"), + + arguments(true, text, "surprises"), + arguments(true, text, "FULL"), + + arguments(false, text, "full surprises"), + arguments(false, text, "full,surprises,world"), + + arguments(true, text, "is full of 1funny"), + arguments(true, text, "This world is"), + arguments(false, text, "A Full Yes Or No"), + }); + } + + /** + * Common arguments for Text.FUZZY and Text.NOT_FUZZY tests + * + * @return + */ + private static Stream textFuzzyCommon() { + return Arrays.stream(new Arguments[] { + // Short + arguments(true, shortValue, "ah"), + arguments(false, shortValue, "ai"), + + // Medium + arguments(true, mediumValue, "hop"), + arguments(true, mediumValue, "hopp"), + arguments(true, mediumValue, "hap"), + arguments(false, mediumValue, "ha"), + arguments(false, mediumValue, "hoopp"), + + // Long + arguments(true, longValue, "surprises"), + arguments(true, longValue, "surpprises"), + arguments(true, longValue, "sutprises"), + arguments(true, longValue, "surprise"), + arguments(false, longValue, "surppirsses") + }); + } + + /** + * Common arguments for Text.CONTAINS_FUZZY and Text.NOT_CONTAINS_FUZZY tests + * + * @return + */ + private static Stream textContainsFuzzyCommon() { + return Arrays.stream(new Arguments[] { + // Short + arguments(true, shortValue, "ah"), + arguments(false, shortValue, "ai"), + + // Medium + arguments(true, mediumValue, "hop"), + arguments(true, mediumValue, "hopp"), + arguments(true, mediumValue, "hap"), + arguments(false, mediumValue, "ha"), + arguments(false, mediumValue, "hoopp"), + + // Long + arguments(true, longValue, "surprises"), + arguments(true, longValue, "surpprises"), + arguments(true, longValue, "sutprises"), + arguments(true, longValue, "surprise"), + arguments(false, longValue, "surppirsses"), + + // Short + arguments(true, text, "is"), + arguments(false, text, "si"), + + // Medium + arguments(true, text, "full"), + arguments(true, text, "fully"), + arguments(true, text, "ful"), + arguments(true, text, "fill"), + arguments(false, text, "fu"), + arguments(false, text, "fullest"), + + // Long + arguments(true, text, "surprises"), + arguments(true, text, "Surpprises"), + arguments(true, text, "Sutrises"), + arguments(true, text, "surprise"), + arguments(false, text, "surppirsses"), + }); + } + + /** + * Generates arguments for Text.CONTAINS tests + * + * @return + */ + public static Stream textContains() { + return addPredicate(Text.CONTAINS, textContainsCommon()); + } + + /** + * Generates arguments for Text.NOT_CONTAINS tests + * + * @return + */ + public static Stream textNotContains() { + return addPredicate(Text.NOT_CONTAINS, negate(textContainsCommon())); + } + + /** + * Generates arguments for Text.PREFIX tests + * + * @return + */ + public static Stream textPrefix() { + return addPredicate(Text.PREFIX, textPrefixCommon()); + } + + /** + * Generates arguments for Text.NOT_PREFIX tests + * + * @return + */ + public static Stream textNotPrefix() { + return addPredicate(Text.NOT_PREFIX, negate(textPrefixCommon())); + } + + /** + * Generates arguments for Text.CONTAINS_PREFIX tests + * + * @return + */ + public static Stream textContainsPrefix() { + return addPredicate(Text.CONTAINS_PREFIX, textContainsPrefixCommon()); + } + + /** + * Generates arguments for Text.NOT_CONTAINS_PREFIX tests + * + * @return + */ + public static Stream textNotContainsPrefix() { + return addPredicate(Text.NOT_CONTAINS_PREFIX, negate(textContainsPrefixCommon())); + } + + /** + * Generates arguments for Text.REGEX tests + * + * @return + */ + public static Stream textRegex() { + return addPredicate(Text.REGEX, textRegexCommon()); + } + + /** + * Generates arguments for Text.NOT_REGEX tests + * + * @return + */ + public static Stream textNotRegex() { + return addPredicate(Text.NOT_REGEX, negate(textRegexCommon())); + } + + /** + * Generates arguments for Text.CONTAINS_REGEX tests + * + * @return + */ + public static Stream textContainsRegex() { + return addPredicate(Text.CONTAINS_REGEX, textContainsRegexCommon()); + } + + /** + * Generates arguments for Text.NOT_CONTAINS_REGEX tests + * + * @return + */ + public static Stream textNotContainsRegex() { + return addPredicate(Text.NOT_CONTAINS_REGEX, negate(textContainsRegexCommon())); + } + + /** + * Generates arguments for Text.CONTAINS_PHRASE tests + * + * @return + */ + public static Stream textContainsPhrase() { + return addPredicate(Text.CONTAINS_PHRASE, textContainsPhraseCommon()); + } + + /** + * Generates arguments for Text.NOT_CONTAINS_PHRASE tests + * + * @return + */ + public static Stream textNotContainsPhrase() { + return addPredicate(Text.NOT_CONTAINS_PHRASE, negate(textContainsPhraseCommon())); + } + + /** + * Generates arguments for Text.FUZZY tests + * + * @return + */ + public static Stream textFuzzy() { + return addPredicate(Text.FUZZY, textFuzzyCommon()); + } + + /** + * Generates arguments for Text.NOT_FUZZY tests + * + * @return + */ + public static Stream textNotFuzzy() { + return addPredicate(Text.NOT_FUZZY, negate(textFuzzyCommon())); + } + + /** + * Generates arguments for Text.CONTAINS_FUZZY tests + * + * @return + */ + public static Stream textContainsFuzzy() { + return addPredicate(Text.CONTAINS_FUZZY, textContainsFuzzyCommon()); + } + + /** + * Generates arguments for Text.NOT_CONTAINS_FUZZY tests + * + * @return + */ + public static Stream textNotContainsFuzzy() { + return addPredicate(Text.NOT_CONTAINS_FUZZY, negate(textContainsFuzzyCommon())); + } + + /** + * Returns an argument list for all string predicates + * + * @return + */ + public static Stream string() { + return Stream.of( + textFuzzy(), + textNotFuzzy(), + + textPrefix(), + textNotPrefix(), + + textRegex(), + textNotRegex() + ).flatMap(ii -> ii); + } + + /** + * Returns an argument list for all text predicates + * + * @return + */ + public static Stream text() { + return Stream.of( + textContains(), + textNotContains(), + + textContainsFuzzy(), + textNotContainsFuzzy(), + + textContainsPhrase(), + textNotContainsPhrase(), + + textContainsPrefix(), + textNotContainsPrefix(), + + textContainsRegex(), + textNotContainsRegex() + ).flatMap(ii -> ii); + } + + /** + * Returns an argument list for all predicates + * + * @return + */ + public static Stream all() { + return Stream.concat(string(), text()); + } +} diff --git a/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java b/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java index eec58ea8419..dbc91e95ca6 100644 --- a/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java +++ b/janusgraph-backend-testutils/src/main/java/org/janusgraph/diskstorage/indexing/IndexProviderTest.java @@ -33,9 +33,12 @@ import org.janusgraph.graphdb.types.ParameterType; import org.janusgraph.testutil.RandomGenerator; +import org.junit.Assume; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.mockito.Mockito; import java.time.Duration; @@ -45,6 +48,7 @@ import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.Matchers.is; /** * @author Matthias Broecheler (me@matthiasb.com) @@ -1197,10 +1201,79 @@ public void clearStorageTest() throws Exception { assertFalse(index.exists()); } + @ParameterizedTest + @MethodSource("org.janusgraph.core.attribute.TextArgument#text") + public void testTextPredicate(JanusGraphPredicate predicate, boolean expected, String value, String condition) throws BackendException { + assumeIndexSupportFor(Mapping.TEXT, predicate); + initializeWithDoc("vertex", "test1", TEXT, value, true); + testPredicateByCount((expected) ? 1 : 0, predicate, TEXT, condition); + } + + @ParameterizedTest + @MethodSource("org.janusgraph.core.attribute.TextArgument#string") + public void testStringPredicate(JanusGraphPredicate predicate, boolean expected, String value, String condition) throws BackendException { + assumeIndexSupportFor(Mapping.STRING, predicate); + initializeWithDoc("vertex", "test1", NAME, value, true); + testPredicateByCount((expected) ? 1 : 0, predicate, NAME, condition); + } + /* ================================================================================== HELPER METHODS ==================================================================================*/ + /** + * Initialize the store, and add a test document with the provided + * field/value pair. + * + * @param store + * @param docId + * @param field + * @param value + * @param isNew + * @throws BackendException + */ + private void initializeWithDoc(String store, String docId, String field, String value, boolean isNew) throws BackendException { + initialize(store); + + Multimap doc = HashMultimap.create(); + doc.put(field, value); + + add(store, docId, doc, isNew); + + clopen(); + } + + /** + * Tests the index to ensure it supports the provided mapping, + * and the provided predicate. + * + * @param mapping + * @param predicate + */ + protected void assumeIndexSupportFor(Mapping mapping, JanusGraphPredicate predicate) { + Assume.assumeThat("Index supports mapping"+mapping, indexFeatures.supportsStringMapping(mapping), is(true)); + Assume.assumeThat("Index supports predicate "+predicate+" for mapping "+mapping, supportsPredicateFor(mapping, predicate), is(true)); + } + + protected boolean supportsPredicateFor(Mapping mapping, Class dataType, Cardinality cardinality, JanusGraphPredicate predicate) { + return index.supports(new StandardKeyInformation(dataType, cardinality, mapping.asParameter()), predicate); + } + + protected boolean supportsPredicateFor(Mapping mapping, Class dataType, JanusGraphPredicate predicate) { + return supportsPredicateFor(mapping, dataType, Cardinality.SINGLE, predicate); + } + + protected boolean supportsPredicateFor(Mapping mapping, JanusGraphPredicate predicate) { + return supportsPredicateFor(mapping, String.class, Cardinality.SINGLE, predicate); + } + + protected long getDocCountByPredicate(JanusGraphPredicate predicate, String field, String condition) throws BackendException { + return tx.queryStream(new IndexQuery("vertex", PredicateCondition.of(field, predicate, condition))).count(); + } + + private void testPredicateByCount(long expectation, JanusGraphPredicate predicate, String field, String condition) throws BackendException { + assertEquals(expectation, getDocCountByPredicate(predicate, field, condition)); + } protected void initialize(String store) throws BackendException { for (final Map.Entry info : allKeys.entrySet()) { diff --git a/janusgraph-driver/src/main/java/org/janusgraph/core/attribute/Text.java b/janusgraph-driver/src/main/java/org/janusgraph/core/attribute/Text.java index ac69bf27bb9..1d7e38e9d48 100644 --- a/janusgraph-driver/src/main/java/org/janusgraph/core/attribute/Text.java +++ b/janusgraph-driver/src/main/java/org/janusgraph/core/attribute/Text.java @@ -56,21 +56,71 @@ public boolean evaluateRaw(String value, String terms) { return true; } - @Override public boolean isValidCondition(Object condition) { return condition instanceof String && StringUtils.isNotBlank((String) condition); } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_CONTAINS; + } + @Override public String toString() { return "textContains"; } + + }, + + /** + * Whether the text doesnt contain a given term as a token in the text (case insensitive) + */ + NOT_CONTAINS { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String terms) { + return !CONTAINS.evaluateRaw(value, terms); + } + + @Override + public boolean isValidCondition(Object condition) { + return CONTAINS.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.CONTAINS; + } + + @Override + public String toString() { + return "textNotContains"; + } + }, /** * Whether the text contains a token that starts with a given term (case insensitive) */ CONTAINS_PREFIX { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -90,6 +140,16 @@ public boolean isValidCondition(Object condition) { return condition instanceof String; } + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_CONTAINS_PREFIX; + } + @Override public String toString() { return "textContainsPrefix"; @@ -97,10 +157,49 @@ public String toString() { }, + /** + * Whether the text doesnt contain a token that starts with a given term (case insensitive) + */ + NOT_CONTAINS_PREFIX { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String prefix) { + return !CONTAINS_PREFIX.evaluateRaw(value, prefix); + } + + @Override + public boolean isValidCondition(Object condition) { + return CONTAINS_PREFIX.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.CONTAINS_PREFIX; + } + + @Override + public String toString() { + return "textNotContainsPrefix"; + } + + }, + /** * Whether the text contains a token that matches a regular expression */ CONTAINS_REGEX { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -120,6 +219,16 @@ public boolean isValidCondition(Object condition) { return condition instanceof String && StringUtils.isNotBlank(condition.toString()); } + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_CONTAINS_REGEX; + } + @Override public String toString() { return "textContainsRegex"; @@ -127,10 +236,49 @@ public String toString() { }, + /** + * Whether the text doesnt contain a token that matches a regular expression + */ + NOT_CONTAINS_REGEX { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String regex) { + return !CONTAINS_REGEX.evaluateRaw(value, regex); + } + + @Override + public boolean isValidCondition(Object condition) { + return CONTAINS_REGEX.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.CONTAINS_REGEX; + } + + @Override + public String toString() { + return "textNotContainsRegex"; + } + + }, + /** * Whether the text starts with a given prefix (case sensitive) */ PREFIX { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -147,6 +295,16 @@ public boolean isValidCondition(Object condition) { return condition instanceof String; } + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_PREFIX; + } + @Override public String toString() { return "textPrefix"; @@ -154,10 +312,49 @@ public String toString() { }, + /** + * Whether the text doesnt start with a given prefix (case sensitive) + */ + NOT_PREFIX { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String prefix) { + return !value.startsWith(prefix.trim()); + } + + @Override + public boolean isValidCondition(Object condition) { + return PREFIX.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.PREFIX; + } + + @Override + public String toString() { + return "textNotPrefix"; + } + + }, + /** * Whether the text matches a regular expression (case sensitive) */ REGEX { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -173,12 +370,59 @@ public boolean isValidCondition(Object condition) { return condition instanceof String && StringUtils.isNotBlank(condition.toString()); } + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_REGEX; + } + @Override public String toString() { return "textRegex"; } - }, + }, + + /** + * Whether the text fails a regular expression (case sensitive) + */ + NOT_REGEX { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + public boolean evaluateRaw(String value, String regex) { + return !value.matches(regex); + } + + @Override + public boolean isValidCondition(Object condition) { + return REGEX.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.REGEX; + } + + @Override + public String toString() { + return "textNotRegex"; + } + + }, /** * Whether the text is at X Levenshtein of a token (case sensitive) @@ -188,6 +432,7 @@ public String toString() { * - 2 for strings of more than five characters */ FUZZY { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -203,12 +448,63 @@ public boolean evaluateRaw(String value, String term) { public boolean isValidCondition(Object condition) { return condition instanceof String && StringUtils.isNotBlank(condition.toString()); } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_FUZZY; + } + @Override public String toString() { return "textFuzzy"; } - }, + }, + + /** + * Whether the text is not at X Levenshtein of a token (case sensitive) + * with X=: + * - 0 for strings of one or two characters + * - 1 for strings of three, four or five characters + * - 2 for strings of more than five characters + */ + NOT_FUZZY { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String term) { return !isFuzzy(term.trim(),value.trim()); } + + @Override + public boolean isValidCondition(Object condition) { + return FUZZY.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.FUZZY; + } + + @Override + public String toString() { + return "textNotFuzzy"; + } + + }, /** * Whether the text contains a token is at X Levenshtein of a token (case insensitive) @@ -218,6 +514,7 @@ public String toString() { * - 2 for strings of more than five characters */ CONTAINS_FUZZY { + @Override public boolean test(Object value, Object condition) { this.preevaluate(value, condition); @@ -236,13 +533,162 @@ public boolean evaluateRaw(String value, String term) { public boolean isValidCondition(Object condition) { return condition instanceof String && StringUtils.isNotBlank(condition.toString()); } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_CONTAINS_FUZZY; + } + @Override public String toString() { return "textContainsFuzzy"; } + }, + + /** + * Whether the text doesnt contain a token is at X Levenshtein of a token (case insensitive) + * with X=: + * - 0 for strings of one or two characters + * - 1 for strings of three, four or five characters + * - 2 for strings of more than five characters + */ + NOT_CONTAINS_FUZZY { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String term) { + return !CONTAINS_FUZZY.evaluateRaw(value, term); + } + + @Override + public boolean isValidCondition(Object condition) { + return CONTAINS_FUZZY.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.CONTAINS_FUZZY; + } + + @Override + public String toString() { + return "textNotContainsFuzzy"; + } + + }, + + /** + * Whether the text contains a given token sequence in the text (case insensitive) + */ + CONTAINS_PHRASE { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String terms) { + List valueTerms = tokenize(value.trim().toLowerCase()); + List tokenTerms = tokenize(terms.trim().toLowerCase()); + if (!terms.isEmpty() && tokenTerms.isEmpty()) return false; + return (Collections.indexOfSubList(valueTerms, tokenTerms) != -1); + } + + @Override + public boolean isValidCondition(Object condition) { + return condition instanceof String && StringUtils.isNotBlank((String) condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.NOT_CONTAINS_PHRASE; + } + + @Override + public String toString() { + return "textContainsPhrase"; + } + + }, + + /** + * Whether the text does not contains a given token sequence in the text (case insensitive) + */ + NOT_CONTAINS_PHRASE { + + @Override + public boolean test(Object value, Object condition) { + this.preevaluate(value, condition); + return value != null && evaluateRaw(value.toString(), (String) condition); + } + + @Override + public boolean evaluateRaw(String value, String terms) { + return !CONTAINS_PHRASE.evaluateRaw(value, terms); + } + + @Override + public boolean isValidCondition(Object condition) { + return CONTAINS_PHRASE.isValidCondition(condition); + } + + @Override + public boolean hasNegation() { + return true; + } + + @Override + public JanusGraphPredicate negate() { + return Text.CONTAINS_PHRASE; + } + + @Override + public String toString() { + return "textNotContainsPhrase"; + } + }; + /** + * Calculates the max fuzzy edit distance for a term given its length + * - 0 for strings of one or two characters + * - 1 for strings of three, four or five characters + * - 2 for strings of more than five characters + * @param term + * @return + */ + public static int getMaxEditDistance(String term) { + if (term.length() < 3) + return 0; + else if (term.length() < 6) + return 1; + else + return 2; + } + private static final LevenshteinDistance ONE_LEVENSHTEIN_DISTANCE = new LevenshteinDistance(1); private static final LevenshteinDistance TWO_LEVENSHTEIN_DISTANCE = new LevenshteinDistance(2); @@ -316,27 +762,55 @@ public boolean isQNF() { //////////////// statics public final static Set HAS_CONTAINS = Collections - .unmodifiableSet(EnumSet.of(CONTAINS, CONTAINS_PREFIX, CONTAINS_REGEX, CONTAINS_FUZZY)); + .unmodifiableSet(EnumSet.of(CONTAINS, CONTAINS_PREFIX, CONTAINS_REGEX, CONTAINS_FUZZY, CONTAINS_PHRASE, + NOT_CONTAINS, NOT_CONTAINS_PREFIX, NOT_CONTAINS_REGEX, NOT_CONTAINS_FUZZY, NOT_CONTAINS_PHRASE)); public static JanusGraphP textContains(final V value) { return new JanusGraphP(Text.CONTAINS, value); } + public static JanusGraphP textNotContains(final V value) { + return new JanusGraphP(Text.NOT_CONTAINS, value); + } public static JanusGraphP textContainsPrefix(final V value) { return new JanusGraphP(Text.CONTAINS_PREFIX, value); } + public static JanusGraphP textNotContainsPrefix(final V value) { + return new JanusGraphP(Text.NOT_CONTAINS_PREFIX, value); + } public static JanusGraphP textContainsRegex(final V value) { return new JanusGraphP(Text.CONTAINS_REGEX, value); } + public static JanusGraphP textNotContainsRegex(final V value) { + return new JanusGraphP(Text.NOT_CONTAINS_REGEX, value); + } public static JanusGraphP textPrefix(final V value) { return new JanusGraphP(Text.PREFIX, value); } + public static JanusGraphP textNotPrefix(final V value) { + return new JanusGraphP(Text.NOT_PREFIX, value); + } public static JanusGraphP textRegex(final V value) { return new JanusGraphP(Text.REGEX, value); } + public static JanusGraphP textNotRegex(final V value) { + return new JanusGraphP(Text.NOT_REGEX, value); + } public static JanusGraphP textContainsFuzzy(final V value) { return new JanusGraphP(Text.CONTAINS_FUZZY, value); } + public static JanusGraphP textNotContainsFuzzy(final V value) { + return new JanusGraphP(Text.NOT_CONTAINS_FUZZY, value); + } public static JanusGraphP textFuzzy(final V value) { return new JanusGraphP(Text.FUZZY, value); } + public static JanusGraphP textNotFuzzy(final V value) { + return new JanusGraphP(Text.NOT_FUZZY, value); + } + public static JanusGraphP textContainsPhrase(final V value) { + return new JanusGraphP(Text.CONTAINS_PHRASE, value); + } + public static JanusGraphP textNotContainsPhrase(final V value) { + return new JanusGraphP(Text.NOT_CONTAINS_PHRASE, value); + } } diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java index 85c270d9418..18799c049db 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/ElasticSearchIndex.java @@ -964,22 +964,42 @@ public Map getFilter(Condition condition, KeyInformation.Store if (predicate == Text.CONTAINS || predicate == Cmp.EQUAL) { return compat.match(fieldName, value); + } else if (predicate == Text.NOT_CONTAINS) { + return compat.boolMustNot(compat.match(fieldName, value)); + } else if (predicate == Text.CONTAINS_PHRASE) { + return compat.matchPhrase(fieldName, value); + } else if (predicate == Text.NOT_CONTAINS_PHRASE) { + return compat.boolMustNot(compat.matchPhrase(fieldName, value)); } else if (predicate == Text.CONTAINS_PREFIX) { if (!ParameterType.TEXT_ANALYZER.hasParameter(information.get(key).getParameters())) value = ((String) value).toLowerCase(); return compat.prefix(fieldName, value); + } else if (predicate == Text.NOT_CONTAINS_PREFIX) { + if (!ParameterType.TEXT_ANALYZER.hasParameter(information.get(key).getParameters())) + value = ((String) value).toLowerCase(); + return compat.boolMustNot(compat.prefix(fieldName, value)); } else if (predicate == Text.CONTAINS_REGEX) { if (!ParameterType.TEXT_ANALYZER.hasParameter(information.get(key).getParameters())) value = ((String) value).toLowerCase(); return compat.regexp(fieldName, value); + } else if (predicate == Text.NOT_CONTAINS_REGEX) { + if (!ParameterType.TEXT_ANALYZER.hasParameter(information.get(key).getParameters())) + value = ((String) value).toLowerCase(); + return compat.boolMustNot(compat.regexp(fieldName, value)); } else if (predicate == Text.PREFIX) { return compat.prefix(fieldName, value); + } else if (predicate == Text.NOT_PREFIX) { + return compat.boolMustNot(compat.prefix(fieldName, value)); } else if (predicate == Text.REGEX) { return compat.regexp(fieldName, value); + } else if (predicate == Text.NOT_REGEX) { + return compat.boolMustNot(compat.regexp(fieldName, value)); } else if (predicate == Cmp.NOT_EQUAL) { return compat.boolMustNot(compat.match(fieldName, value)); } else if (predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) { return compat.fuzzyMatch(fieldName, value); + } else if (predicate == Text.NOT_FUZZY || predicate == Text.NOT_CONTAINS_FUZZY) { + return compat.boolMustNot(compat.fuzzyMatch(fieldName, value)); } else if (predicate == Cmp.LESS_THAN) { return compat.lt(fieldName, value); } else if (predicate == Cmp.LESS_THAN_EQUAL) { @@ -1274,11 +1294,16 @@ public boolean supports(KeyInformation information, JanusGraphPredicate janusgra switch(mapping) { case DEFAULT: case TEXT: - return janusgraphPredicate == Text.CONTAINS || janusgraphPredicate == Text.CONTAINS_PREFIX - || janusgraphPredicate == Text.CONTAINS_REGEX || janusgraphPredicate == Text.CONTAINS_FUZZY; + return janusgraphPredicate == Text.CONTAINS || janusgraphPredicate == Text.NOT_CONTAINS + || janusgraphPredicate == Text.CONTAINS_FUZZY || janusgraphPredicate == Text.NOT_CONTAINS_FUZZY + || janusgraphPredicate == Text.CONTAINS_PREFIX || janusgraphPredicate == Text.NOT_CONTAINS_PREFIX + || janusgraphPredicate == Text.CONTAINS_REGEX || janusgraphPredicate == Text.NOT_CONTAINS_REGEX + || janusgraphPredicate == Text.CONTAINS_PHRASE || janusgraphPredicate == Text.NOT_CONTAINS_PHRASE; case STRING: - return janusgraphPredicate instanceof Cmp || janusgraphPredicate==Text.REGEX - || janusgraphPredicate==Text.PREFIX || janusgraphPredicate == Text.FUZZY; + return janusgraphPredicate instanceof Cmp + || janusgraphPredicate==Text.REGEX || janusgraphPredicate==Text.NOT_REGEX + || janusgraphPredicate==Text.PREFIX || janusgraphPredicate==Text.NOT_PREFIX + || janusgraphPredicate == Text.FUZZY || janusgraphPredicate == Text.NOT_FUZZY; case TEXTSTRING: return janusgraphPredicate instanceof Text || janusgraphPredicate instanceof Cmp; } diff --git a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/compat/AbstractESCompat.java b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/compat/AbstractESCompat.java index d2e2ff3fd56..bed525604bc 100644 --- a/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/compat/AbstractESCompat.java +++ b/janusgraph-es/src/main/java/org/janusgraph/diskstorage/es/compat/AbstractESCompat.java @@ -148,6 +148,10 @@ public Map match(String key, Object value) { return match(key, value, null); } + public Map matchPhrase(String key, Object value) { + return ImmutableMap.of("match_phrase", ImmutableMap.of(key, value)); + } + public Map fuzzyMatch(String key, Object value) { return match(key, value, "AUTO"); } diff --git a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchIndexTest.java b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchIndexTest.java index 9d3d6b80250..42068d4cbd7 100644 --- a/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchIndexTest.java +++ b/janusgraph-es/src/test/java/org/janusgraph/diskstorage/es/ElasticsearchIndexTest.java @@ -138,14 +138,25 @@ public Configuration makeESTestConfig(String index, CommonsConfiguration cc) { @Test public void testSupport() { assertTrue(index.supports(of(String.class, Cardinality.SINGLE), Text.CONTAINS)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE), Text.NOT_CONTAINS)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.CONTAINS_PREFIX)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.NOT_CONTAINS_PREFIX)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.CONTAINS_PHRASE)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.NOT_CONTAINS_PHRASE)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.CONTAINS_REGEX)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.NOT_CONTAINS_REGEX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.CONTAINS_FUZZY)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.NOT_CONTAINS_FUZZY)); assertFalse(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.REGEX)); + assertFalse(index.supports(of(String.class, Cardinality.SINGLE, Mapping.TEXT.asParameter()), Text.NOT_REGEX)); assertFalse(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.CONTAINS)); + assertFalse(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.NOT_CONTAINS)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.PREFIX)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.NOT_PREFIX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.FUZZY)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.NOT_FUZZY)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.REGEX)); + assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Text.NOT_REGEX)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Cmp.EQUAL)); assertTrue(index.supports(of(String.class, Cardinality.SINGLE, Mapping.STRING.asParameter()), Cmp.NOT_EQUAL)); @@ -503,5 +514,4 @@ private boolean indexExists(String name) throws IOException { IOUtils.closeQuietly(response); return exists; } - } diff --git a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java index 9383dd2d7f3..2ebc9bb8f85 100644 --- a/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java +++ b/janusgraph-lucene/src/main/java/org/janusgraph/diskstorage/lucene/LuceneIndex.java @@ -748,12 +748,12 @@ private SearchParams convertQuery(Condition condition, final KeyInformation.S } else if (janusgraphPredicate == Cmp.EQUAL || janusgraphPredicate == Cmp.NOT_EQUAL) { tokenize(params, map, delegatingAnalyzer, (String) value, stringFieldKey, janusgraphPredicate); } else if (janusgraphPredicate == Text.FUZZY) { - params.addQuery(new FuzzyQuery(new Term(stringFieldKey, (String) value))); + params.addQuery(new FuzzyQuery(new Term(stringFieldKey, (String) value), Text.getMaxEditDistance((String) value))); } else if (janusgraphPredicate == Text.CONTAINS_FUZZY) { value = ((String) value).toLowerCase(); final Builder b = new BooleanQuery.Builder(); for (final String term : Text.tokenize((String) value)) { - b.add(new FuzzyQuery(new Term(key, term)), BooleanClause.Occur.MUST); + b.add(new FuzzyQuery(new Term(key, term), Text.getMaxEditDistance(term)), BooleanClause.Occur.MUST); } params.addQuery(b.build()); } else diff --git a/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java b/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java index 51dcf7718b8..adc9d252ca3 100644 --- a/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java +++ b/janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java @@ -771,7 +771,7 @@ public String buildQueryFilter(Condition condition, KeyInform return ("-" + key + ":\"" + escapeValue(value) + "\""); } } else if (predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) { - return (key + ":"+escapeValue(value)+"~"); + return (key + ":"+escapeValue(value)+"~"+Text.getMaxEditDistance(value.toString())); } else if (predicate == Cmp.LESS_THAN) { return (key + ":[* TO \"" + escapeValue(value) + "\"}"); } else if (predicate == Cmp.LESS_THAN_EQUAL) { diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java b/janusgraph-test/src/test/java/org/janusgraph/core/attribute/GeoshapeTest.java similarity index 99% rename from janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java rename to janusgraph-test/src/test/java/org/janusgraph/core/attribute/GeoshapeTest.java index 2c564d7b14f..63724908dc6 100644 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/GeoshapeTest.java +++ b/janusgraph-test/src/test/java/org/janusgraph/core/attribute/GeoshapeTest.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.janusgraph.graphdb.attribute; +package org.janusgraph.core.attribute; import org.janusgraph.core.attribute.GeoshapeSerializer; import org.locationtech.spatial4j.context.jts.JtsSpatialContext; diff --git a/janusgraph-test/src/test/java/org/janusgraph/core/attribute/TextTest.java b/janusgraph-test/src/test/java/org/janusgraph/core/attribute/TextTest.java new file mode 100644 index 00000000000..951ea0b4034 --- /dev/null +++ b/janusgraph-test/src/test/java/org/janusgraph/core/attribute/TextTest.java @@ -0,0 +1,126 @@ +// Copyright 2017 JanusGraph Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package org.janusgraph.core.attribute; + +import org.janusgraph.graphdb.query.JanusGraphPredicate; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +/** + * @author Andrew Sheppard (andrew.sheppard@fireeye.com) + */ +public class TextTest { + + @ParameterizedTest + @MethodSource("org.janusgraph.core.attribute.TextArgument#all") + public void testTextPredicate(JanusGraphPredicate predicate, boolean expected, String value, String condition) { + assertEquals(expected, predicate.test(value, condition)); + } + + @Test + public void testNegate() { + assertTrue(Text.CONTAINS.hasNegation()); + assertEquals(Text.CONTAINS, Text.NOT_CONTAINS.negate()); + + assertTrue(Text.NOT_CONTAINS.hasNegation()); + assertEquals(Text.NOT_CONTAINS, Text.CONTAINS.negate()); + + assertTrue(Text.CONTAINS_PREFIX.hasNegation()); + assertEquals(Text.CONTAINS_PREFIX, Text.NOT_CONTAINS_PREFIX.negate()); + + assertTrue(Text.NOT_CONTAINS_PREFIX.hasNegation()); + assertEquals(Text.NOT_CONTAINS_PREFIX, Text.CONTAINS_PREFIX.negate()); + + assertTrue(Text.CONTAINS_REGEX.hasNegation()); + assertEquals(Text.CONTAINS_REGEX, Text.NOT_CONTAINS_REGEX.negate()); + + assertTrue(Text.NOT_CONTAINS_REGEX.hasNegation()); + assertEquals(Text.NOT_CONTAINS_REGEX, Text.CONTAINS_REGEX.negate()); + + assertTrue(Text.CONTAINS_FUZZY.hasNegation()); + assertEquals(Text.CONTAINS_FUZZY, Text.NOT_CONTAINS_FUZZY.negate()); + + assertTrue(Text.NOT_CONTAINS_FUZZY.hasNegation()); + assertEquals(Text.NOT_CONTAINS_FUZZY, Text.CONTAINS_FUZZY.negate()); + + assertTrue(Text.CONTAINS_PHRASE.hasNegation()); + assertEquals(Text.CONTAINS_PHRASE, Text.NOT_CONTAINS_PHRASE.negate()); + + assertTrue(Text.NOT_CONTAINS_PHRASE.hasNegation()); + assertEquals(Text.NOT_CONTAINS_PHRASE, Text.CONTAINS_PHRASE.negate()); + + assertTrue(Text.PREFIX.hasNegation()); + assertEquals(Text.PREFIX, Text.NOT_PREFIX.negate()); + + assertTrue(Text.NOT_PREFIX.hasNegation()); + assertEquals(Text.NOT_PREFIX, Text.PREFIX.negate()); + + assertTrue(Text.REGEX.hasNegation()); + assertEquals(Text.REGEX, Text.NOT_REGEX.negate()); + + assertTrue(Text.NOT_REGEX.hasNegation()); + assertEquals(Text.NOT_REGEX, Text.REGEX.negate()); + + assertTrue(Text.FUZZY.hasNegation()); + assertEquals(Text.FUZZY, Text.NOT_FUZZY.negate()); + + assertTrue(Text.NOT_FUZZY.hasNegation()); + assertEquals(Text.NOT_FUZZY, Text.FUZZY.negate()); + } + + /** + * Test for tokenization MIN_TOKEN_LENGTH + */ + @Test + public void testTextContainsSmallTokens() { + assertFalse(Text.CONTAINS.test(TextArgument.text, "a")); + assertFalse(Text.CONTAINS.test(TextArgument.text, "A")); + + assertTrue(Text.NOT_CONTAINS.test(TextArgument.text, "a")); + assertTrue(Text.NOT_CONTAINS.test(TextArgument.text, "A")); + + assertFalse(Text.CONTAINS_PHRASE.test(TextArgument.text, "a")); + assertFalse(Text.CONTAINS_PHRASE.test(TextArgument.text, "A")); + + assertTrue(Text.NOT_CONTAINS_PHRASE.test(TextArgument.text, "a")); + assertTrue(Text.NOT_CONTAINS_PHRASE.test(TextArgument.text, "A")); + } + + /** + * Test for support of regex character groups + */ + @Test + public void testTextRegexCharacterGroups() { + assertTrue(Text.CONTAINS_REGEX.test(TextArgument.text, "\\dfu\\w*")); + assertTrue(Text.REGEX.test("1funny", "\\dfu\\w*")); + + assertFalse(Text.NOT_CONTAINS_REGEX.test(TextArgument.text, "\\dfu\\w*")); + assertFalse(Text.NOT_REGEX.test("1funny", "\\dfu\\w*")); + } + + @Test + public void testCmp() { + assertTrue(Cmp.EQUAL.test(TextArgument.name, TextArgument.name)); + assertFalse(Cmp.EQUAL.test("fullly funny", TextArgument.name)); + + assertFalse(Cmp.NOT_EQUAL.test(TextArgument.name, TextArgument.name)); + assertTrue(Cmp.NOT_EQUAL.test("fullly funny", TextArgument.name)); + } +} diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/TextTest.java b/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/TextTest.java deleted file mode 100644 index 321ad190103..00000000000 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/attribute/TextTest.java +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2017 JanusGraph Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.janusgraph.graphdb.attribute; - -import org.janusgraph.core.attribute.Cmp; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; - -import static org.janusgraph.core.attribute.Text.*; - -/** - * @author Matthias Broecheler (me@matthiasb.com) - */ - -public class TextTest { - - @Test - public void testContains() { - String text = "This world is full of 1funny surprises! A Full Yes"; - //Contains - assertTrue(CONTAINS.test(text, "world")); - assertTrue(CONTAINS.test(text, "wOrLD")); - assertFalse(CONTAINS.test(text, "worl")); - - assertTrue(CONTAINS.test(text, "this")); - assertTrue(CONTAINS.test(text, "yes")); - assertFalse(CONTAINS.test(text, "funny")); - - assertFalse(CONTAINS.test(text, "a")); - assertFalse(CONTAINS.test(text, "A")); - - assertTrue(CONTAINS.test(text, "surprises")); - assertTrue(CONTAINS.test(text, "FULL")); - - assertTrue(CONTAINS.test(text, "full surprises")); - assertTrue(CONTAINS.test(text, "full,surprises,world")); - assertFalse(CONTAINS.test(text, "full bunny")); - assertTrue(CONTAINS.test(text, "a world")); - - - - //Prefix - assertTrue(CONTAINS_PREFIX.test(text, "worl")); - assertTrue(CONTAINS_PREFIX.test(text, "wORl")); - assertTrue(CONTAINS_PREFIX.test(text, "ye")); - assertTrue(CONTAINS_PREFIX.test(text, "Y")); - - assertFalse(CONTAINS_PREFIX.test(text, "fo")); - assertFalse(CONTAINS_PREFIX.test(text, "of 1f")); - assertFalse(CONTAINS_PREFIX.test(text, "ses")); - - - //Regex - assertTrue(CONTAINS_REGEX.test(text, "fu[l]+")); - assertTrue(CONTAINS_REGEX.test(text, "wor[ld]{1,2}")); - assertTrue(CONTAINS_REGEX.test(text, "\\dfu\\w*")); - - assertFalse(CONTAINS_REGEX.test(text, "fo")); - assertFalse(CONTAINS_REGEX.test(text, "wor[l]+")); - assertFalse(CONTAINS_REGEX.test(text, "wor[ld]{3,5}")); - - - String name = "fully funny"; - //Cmp - assertTrue(Cmp.EQUAL.test(name, name)); - assertFalse(Cmp.NOT_EQUAL.test(name, name)); - assertFalse(Cmp.EQUAL.test("fullly funny", name)); - assertTrue(Cmp.NOT_EQUAL.test("fullly funny", name)); - - //Prefix - assertTrue(PREFIX.test(name, "fully")); - assertTrue(PREFIX.test(name, "ful")); - assertTrue(PREFIX.test(name, "fully fu")); - assertFalse(PREFIX.test(name, "fun")); - - //REGEX - assertTrue(REGEX.test(name, "(fu[ln]*y) (fu[ln]*y)")); - assertFalse(REGEX.test(name, "(fu[l]*y) (fu[l]*y)")); - assertTrue(REGEX.test(name, "(fu[l]*y) .*")); - - //FUZZY - String shortValue = "ah"; - assertTrue(FUZZY.test(shortValue,"ah")); - assertFalse(FUZZY.test(shortValue,"ai")); - String mediumValue = "hop"; - assertTrue(FUZZY.test(mediumValue,"hop")); - assertTrue(FUZZY.test(mediumValue,"hopp")); - assertTrue(FUZZY.test(mediumValue,"hap")); - assertFalse(FUZZY.test(mediumValue,"ha")); - assertFalse(FUZZY.test(mediumValue,"hoopp")); - String longValue = "surprises"; - assertTrue(FUZZY.test(longValue,"surprises")); - assertTrue(FUZZY.test(longValue,"surpprises")); - assertTrue(FUZZY.test(longValue,"sutprises")); - assertTrue(FUZZY.test(longValue,"surprise")); - assertFalse(FUZZY.test(longValue,"surppirsses")); - - //CONTAINS_FUZZY - //Short - assertTrue(CONTAINS_FUZZY.test(text,"is")); - assertFalse(CONTAINS_FUZZY.test(text,"si")); - //Medium - assertTrue(CONTAINS_FUZZY.test(text,"full")); - assertTrue(CONTAINS_FUZZY.test(text,"fully")); - assertTrue(CONTAINS_FUZZY.test(text,"ful")); - assertTrue(CONTAINS_FUZZY.test(text,"fill")); - assertFalse(CONTAINS_FUZZY.test(text,"fu")); - assertFalse(CONTAINS_FUZZY.test(text,"fullest")); - //Long - assertTrue(CONTAINS_FUZZY.test(text,"surprises")); - assertTrue(CONTAINS_FUZZY.test(text,"Surpprises")); - assertTrue(CONTAINS_FUZZY.test(text,"Sutrises")); - assertTrue(CONTAINS_FUZZY.test(text,"surprise")); - assertFalse(CONTAINS_FUZZY.test(text,"surppirsses")); - } -} diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/predicate/ConnectiveJanusPredicateTest.java b/janusgraph-test/src/test/java/org/janusgraph/graphdb/predicate/ConnectiveJanusPredicateTest.java index 85bd315e187..61bce9353b7 100644 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/predicate/ConnectiveJanusPredicateTest.java +++ b/janusgraph-test/src/test/java/org/janusgraph/graphdb/predicate/ConnectiveJanusPredicateTest.java @@ -85,11 +85,6 @@ public void testHasNegationOk() { assertTrue(getPredicate(Arrays.asList(Geo.INTERSECT, Cmp.EQUAL)).hasNegation()); } - @Test - public void testHasNegationKo() { - assertFalse(getPredicate(Arrays.asList(Text.CONTAINS, Cmp.EQUAL)).hasNegation()); - } - @Test public void testNegate() { assertEquals(getNegatePredicate(Arrays.asList(Geo.DISJOINT, Cmp.NOT_EQUAL)), getPredicate(Arrays.asList(Geo.INTERSECT, Cmp.EQUAL)).negate()); diff --git a/janusgraph-test/src/test/java/org/janusgraph/graphdb/query/QueryTest.java b/janusgraph-test/src/test/java/org/janusgraph/graphdb/query/QueryTest.java index df01aaca826..a1947c28b30 100644 --- a/janusgraph-test/src/test/java/org/janusgraph/graphdb/query/QueryTest.java +++ b/janusgraph-test/src/test/java/org/janusgraph/graphdb/query/QueryTest.java @@ -351,5 +351,69 @@ public void testFuzzyMatchWithoutIndex() { assertEquals(0, graph.traversal().V().has("name", Text.textContainsFuzzy("valuable")).count().next()); } + @Test + public void testTextContainsPhraseWithoutIndex() { + JanusGraphManagement mgmt = graph.openManagement(); + PropertyKey name = mgmt.makePropertyKey("name").dataType(String.class).make(); + mgmt.commit(); + + tx.addVertex().property("name", "some value"); + tx.addVertex().property("name", "other value"); + tx.commit(); + + assertEquals(2, graph.traversal().V().has("name", Text.textContainsPhrase("value")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textContainsPhrase("other value")).count().next()); + assertEquals(0, graph.traversal().V().has("name", Text.textContainsPhrase("final value")).count().next()); + } + + @Test + public void testTextNegatedWithoutIndex() { + JanusGraphManagement mgmt = graph.openManagement(); + PropertyKey name = mgmt.makePropertyKey("name").dataType(String.class).make(); + mgmt.commit(); + + tx.addVertex().property("name", "some value"); + tx.addVertex().property("name", "other value"); + tx.commit(); + + // Text.textNotFuzzy + assertEquals(1, graph.traversal().V().has("name", Text.textNotFuzzy("other values")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotFuzzy("final values")).count().next()); + + // Text.textNotRegex + assertEquals(0, graph.traversal().V().has("name", Text.textNotRegex(".*value")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotRegex("other.*")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotRegex("final.*")).count().next()); + + // Text.textNotPrefix + assertEquals(1, graph.traversal().V().has("name", Text.textNotPrefix("other")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotPrefix("final")).count().next()); + + // Text.textNotContains + assertEquals(0, graph.traversal().V().has("name", Text.textNotContains("value")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotContains("other")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotContains("final")).count().next()); + + // Text.textNotContainsFuzzy + assertEquals(0, graph.traversal().V().has("name", Text.textNotContainsFuzzy("values")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotContainsFuzzy("others")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotContainsFuzzy("final")).count().next()); + + // Text.textNotContainsRegex + assertEquals(0, graph.traversal().V().has("name", Text.textNotContainsRegex("val.*")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotContainsRegex("oth.*")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotContainsRegex("fin.*")).count().next()); + + // Text.textNotContainsPrefix + assertEquals(0, graph.traversal().V().has("name", Text.textNotContainsPrefix("val")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotContainsPrefix("oth")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotContainsPrefix("final")).count().next()); + + // Text.textNotContainsPhrase + assertEquals(0, graph.traversal().V().has("name", Text.textNotContainsPhrase("value")).count().next()); + assertEquals(1, graph.traversal().V().has("name", Text.textNotContainsPhrase("other value")).count().next()); + assertEquals(2, graph.traversal().V().has("name", Text.textNotContainsPhrase("final value")).count().next()); + } + }