Skip to content

Commit

Permalink
Implement adjust_offsets on word delimiter graph token filter (#3934)
Browse files Browse the repository at this point in the history
This commit implements adjust_offsets on word delimiter graph token filter

(cherry picked from commit e410ac9)
  • Loading branch information
codebrain authored and Stuart Cam committed Jul 19, 2019
1 parent 626d2d8 commit 041ea8a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ namespace Nest
/// </summary>
public interface IWordDelimiterGraphTokenFilter : ITokenFilter
{
/// <summary>
/// By default, the filter tries to output subtokens with adjusted offsets to reflect their actual position in the token stream. However, when used in combination with other filters that alter the length or starting position of tokens without changing their offsets (e.g. <see cref="TrimTokenFilter"/>) this can cause tokens with illegal offsets to be emitted. Setting <see cref="AdjustOffsets"/> to false will stop <see cref="WordDelimiterGraphTokenFilter"/> from adjusting these internal offsets.
/// </summary>
[DataMember(Name ="adjust_offsets")]
[JsonFormatter(typeof(NullableStringBooleanFormatter))]
bool? AdjustOffsets { get; set; }

/// <summary>
/// If true causes all subword parts to be catenated: "wi-fi-4000" ⇒ "wifi4000". Defaults to false.
/// </summary>
Expand Down Expand Up @@ -104,6 +111,9 @@ public class WordDelimiterGraphTokenFilter : TokenFilterBase, IWordDelimiterGrap
{
public WordDelimiterGraphTokenFilter() : base("word_delimiter_graph") { }

/// <inheritdoc />
public bool? AdjustOffsets { get; set; }

/// <inheritdoc />
public bool? CatenateAll { get; set; }

Expand Down Expand Up @@ -149,6 +159,7 @@ public class WordDelimiterGraphTokenFilterDescriptor
: TokenFilterDescriptorBase<WordDelimiterGraphTokenFilterDescriptor, IWordDelimiterGraphTokenFilter>, IWordDelimiterGraphTokenFilter
{
protected override string Type => "word_delimiter_graph";
bool? IWordDelimiterGraphTokenFilter.AdjustOffsets { get; set; }
bool? IWordDelimiterGraphTokenFilter.CatenateAll { get; set; }
bool? IWordDelimiterGraphTokenFilter.CatenateNumbers { get; set; }
bool? IWordDelimiterGraphTokenFilter.CatenateWords { get; set; }
Expand Down Expand Up @@ -179,6 +190,9 @@ public WordDelimiterGraphTokenFilterDescriptor GenerateNumberParts(bool? generat
public WordDelimiterGraphTokenFilterDescriptor CatenateNumbers(bool? catenateNumbers = true) =>
Assign(catenateNumbers, (a, v) => a.CatenateNumbers = v);

/// <inheritdoc />
public WordDelimiterGraphTokenFilterDescriptor AdjustOffsets(bool? adjustOffsets = true) => Assign(adjustOffsets, (a, v) => a.AdjustOffsets = v);

/// <inheritdoc />
public WordDelimiterGraphTokenFilterDescriptor CatenateAll(bool? catenateAll = true) => Assign(catenateAll, (a, v) => a.CatenateAll = v);

Expand Down
3 changes: 3 additions & 0 deletions src/Tests/Tests/Analysis/TokenFilters/TokenFilterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
{
public override FuncTokenFilters Fluent => (n, tf) => tf
.WordDelimiterGraph(n, t => t
.AdjustOffsets()
.CatenateAll()
.CatenateNumbers()
.CatenateWords()
Expand All @@ -848,6 +849,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
public override ITokenFilter Initializer =>
new WordDelimiterGraphTokenFilter
{
AdjustOffsets = true,
CatenateAll = true,
CatenateNumbers = true,
CatenateWords = true,
Expand All @@ -863,6 +865,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
public override object Json => new
{
type = "word_delimiter_graph",
adjust_offsets = true,
generate_word_parts = true,
generate_number_parts = true,
catenate_words = true,
Expand Down

0 comments on commit 041ea8a

Please sign in to comment.