Skip to content

Commit

Permalink
GH-281 TermQuery with a string.Empty value needs .Verbatim() or it is…
Browse files Browse the repository at this point in the history
… ignored as "conditionless" (#283)

* test: reproduce GH-281 TermQuery with Value of empty string serializes as <null>

Signed-off-by: David Alpert <[email protected]>

* test: demonstrate GH-128 is a feature, not a bug

Further investigation shows that a TermQuery with an empty
string value is expected to be 'conditionless' in this
codebase and the TermQuery implementation includes a
.Verbatim() method to allow the client to serialize
the query clause even though it evaluates to
conditionless

this allows me to create a query like this

    GET /index/_search
    {
      "query": {
        "bool": {
          "must": [
            {"exists": { "field": "last_name"}}
          ],
          "must_not": [
            {"term": {"last_name.keyword": {"value": ""}}}
          ]
        }
      }
    }

using the following syntax

    client.Search<SampleDomainObject>(s => s
      .Query(q => q
        .Bool(b => b
          .Must(m => m.Exists(e => e.Field("last_name")))
          .MustNot(m => m.Term(t => t.Verbatim().Field("last_name.keyword").Value(string.Empty)))
        )
      )
      .Index("index")
      .Source(sfd => null)
    );

thus resolving that GH-281 is not a bug and is working as designed.

Signed-off-by: David Alpert <[email protected]>

* refactor: address PR feedback

GH-281

Signed-off-by: David Alpert <[email protected]>

* docs: update USAGE.md with an example of IsVerbatim/Verbatim()

GH-281

Signed-off-by: David Alpert <[email protected]>

---------

Signed-off-by: David Alpert <[email protected]>
  • Loading branch information
davidalpert authored Aug 14, 2023
1 parent 309111a commit 9e9c503
Show file tree
Hide file tree
Showing 2 changed files with 217 additions and 0 deletions.
43 changes: 43 additions & 0 deletions USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,49 @@ var request = new SearchRequest

var searchResponse = client.Search<Tweet>(request);
```

#### Searching for presence or absence of exact terms

From the documentation on [Term-level queries](https://opensearch.org/docs/latest/query-dsl/term/):

> Term-level queries search an index for documents that contain an exact search term...
>
> When working with text data, use term-level queries for fields mapped as keyword only.
>
> Term-level queries are not suited for searching analyzed text fields. To return analyzed fields, use a full-text query.
The above search example includes a `TermQuery` matching documents with `user:kimchy`.

Term-level queries with empty or null values however are stripped from search requests by default.

To search for documents which contain a non-null but empty field value (i.e. an empty string) include an `IsVerbatim` property or a `.Verbatim()` clause like this:

```csharp
var request = new SearchRequest
{
From = 0,
Size = 10,
Query = new TermQuery { Field = "user", Value = "", IsVerbatim = true },
};

var searchResponse = client.Search<Tweet>(request);
```

In Fluent syntax this might look like:

```csharp
var result = await OpenSearchClient.SearchAsync<Tweet>(s => s
.Index(index)
.From(0)
.Size(10)
.Query(q => q
.Bool(b => b
.Must(m => m.Term(t => t.Verbatim().Field(f => f.User).Value(string.Empty)))
)
)
);
```

### Falling back to OpenSearch.Net

OpenSearch.Client also includes and exposes the low-level [OpenSearch.Net](https://github.com/opensearch-project/opensearch-net/tree/main/src/OpenSearch.Net) client that you can fall back to in case anything is missing:
Expand Down
174 changes: 174 additions & 0 deletions tests/Tests.Reproduce/GitHubIssue281.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

using System;
using System.Linq;
using System.Text;
using System.Text.Json.Serialization;
using OpenSearch.OpenSearch.Xunit.XunitPlumbing;
using OpenSearch.Net;
using FluentAssertions;
using OpenSearch.Client;
using Xunit;

namespace Tests.Reproduce
{
public class GithubIssue281
{
public class SampleDomainObject
{
[JsonPropertyName("first_name")]
public string FirstName { get; set; }

[JsonPropertyName("last_name")]
public string LastName { get; set; }
}

[U]
public void GithubIssu281_MustNotWithTermQueryAndVerbatimEmptyValueShouldBeInRequestBody()
{
var connectionSettings = new ConnectionSettings(new InMemoryConnection()).DisableDirectStreaming();
var client = new OpenSearchClient(connectionSettings);

var action = () =>
client.Search<SampleDomainObject>(s => s
.Query(q => q
.Bool(b => b
.Must(m => m.Exists(e => e.Field("last_name")))
.MustNot(m => m.Term(t => t.Verbatim().Field("last_name.keyword").Value(string.Empty)))
)
)
.Index("index")
.Source(sfd => null)
);

var response = action.Should().NotThrow().Subject;

var json = Encoding.UTF8.GetString(response.ApiCall.RequestBodyInBytes);
json.Should()
.Be(
@"{""query"":{""bool"":{""must"":[{""exists"":{""field"":""last_name""}}],""must_not"":[{""term"":{""last_name.keyword"":{""value"":""""}}}]}}}");
}

[U]
public void GithubIssue281_MustNotTermQueryAndVerbatimEmptyValueShouldBeRegisteredAsNonNull()
{
Func<SearchDescriptor<SampleDomainObject>, ISearchRequest> selector = s => s
.Query(q => q
.Bool(b => b
.Must(m => m.Exists(e => e.Field("last_name")))
.MustNot(m => m.Term(t => t.Verbatim().Field("last_name.keyword").Value(string.Empty)))
)
)
.Index("index")
.Source(sfd => null);

var searchRequest = selector.Invoke(new SearchDescriptor<SampleDomainObject>());
var query = searchRequest.Query as IQueryContainer;

// this is fine
query.Bool.Must.Should().NotBeEmpty();
query.Bool.Must.First().Should().NotBeNull("Must");

// this too...
query.Bool.MustNot.Should().NotBeEmpty();
// ... and this passes so long as `.Verbatim()` is used in the `TermQuery`
query.Bool.MustNot.First().Should().NotBeNull("MustNot");
}

[U]
public void GithubIssue281_MustNotTermQueryAndNonVerbatimNonEmptyValueShouldBeRegisteredAsNonNull()
{
Func<SearchDescriptor<SampleDomainObject>, ISearchRequest> selector = s => s
.Query(q => q
.Bool(b => b
.Must(m => m.Exists(e => e.Field("last_name")))
.MustNot(m => m.Term(t => t.Verbatim().Field("last_name.keyword").Value("mal")))
)
)
.Index("index")
.Source(sfd => null);

var searchRequest = selector.Invoke(new SearchDescriptor<SampleDomainObject>());
var query = searchRequest.Query as IQueryContainer;

// this is fine
query.Bool.Must.Should().NotBeEmpty();
query.Bool.Must.First().Should().NotBeNull("Must");

// this too...
query.Bool.MustNot.Should().NotBeEmpty();
// and so is this when the Term value is non-empty
query.Bool.MustNot.First().Should().NotBeNull("MustNot");
}

[U]
public void GithubIssue281_MustNotExistsClauseShouldNotBeNull()
{
Func<SearchDescriptor<SampleDomainObject>, ISearchRequest> selector = s => s
.Query(q => q
.Bool(b => b
.Must(m => m.Exists(e => e.Field("last_name")))
.MustNot(m => m.Exists(e => e.Field("last_name")))
)
)
.Index("index")
.Source(sfd => null);

var searchRequest = selector.Invoke(new SearchDescriptor<SampleDomainObject>());
var query = searchRequest.Query as IQueryContainer;

// this is fine
query.Bool.Must.Should().NotBeEmpty();
query.Bool.Must.First().Should().NotBeNull("Must");

// MustNot ... Exists seems to work
query.Bool.MustNot.Should().NotBeEmpty();
query.Bool.MustNot.First().Should().NotBeNull("MustNot");
}

[U]
public void GithubIssue281_TermQueryWithNonEmptyValueSerializesToNonNullResult()
{
Func<QueryContainerDescriptor<SampleDomainObject>, QueryContainer> termQuery =
m => m.Term(t => t.Field("last_name.keyword").Value("doe"));

var result = termQuery.Invoke(new QueryContainerDescriptor<SampleDomainObject>());

result.Should().NotBeNull();
}

[U]
public void GithubIssue281_TermQueryWithVerbatimEmptyValueSerializesToNonNullResult()
{
Func<QueryContainerDescriptor<SampleDomainObject>, QueryContainer> termQuery =
m => m.Term(t => t.Verbatim().Field("last_name.keyword").Value(string.Empty));

var result = termQuery.Invoke(new QueryContainerDescriptor<SampleDomainObject>());

result.Should().NotBeNull();
}

[TU]
[InlineData("null", null, true)]
[InlineData("non-empty string", "doe", false)]
[InlineData("empty string", "", true)]
public void GithubIssue281_TermQueryIsConditionless(string scenario, string val, bool expected)
{
bool SimulateIsConditionless(ITermQuery q)
{
return q.Value == null || string.IsNullOrEmpty(q.Value.ToString());
}

var temrQuery = new TermQuery { Value = val };

var result = SimulateIsConditionless(temrQuery);

result.Should().Be(expected, $"{scenario} should be conditionless: ${expected}");
}
}
}

0 comments on commit 9e9c503

Please sign in to comment.