From c94be8f517fcd5b4aaa7f46bcee0c6f308079110 Mon Sep 17 00:00:00 2001 From: Chandler Prall Date: Tue, 8 Jan 2019 09:52:34 -0700 Subject: [PATCH 1/4] wip --- .../search_bar/query/default_syntax.js | 14 +++++++++++- .../search_bar/query/default_syntax.test.js | 22 +++++++++++++++++++ yarn.lock | 5 +++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/components/search_bar/query/default_syntax.js b/src/components/search_bar/query/default_syntax.js index 3bced52e84d..0ad0765e05f 100644 --- a/src/components/search_bar/query/default_syntax.js +++ b/src/components/search_bar/query/default_syntax.js @@ -127,8 +127,11 @@ word wordChar = alnum - / [-_*:] + / [-_:*] / escapedChar + / [^ \\t\\n\\r] { + return testIsNonAsciiWordCharacter(text()) ? text() : ctx.error(\`\${text()} is not a supported character\`); + } escapedChar = "\\\\" reservedChar @@ -188,6 +191,14 @@ const Exp = { boolean: (expression, location) => ({ type: 'boolean', expression, location }) }; +// this isn't _strictly_ correct +// for our purposes, a non-ascii word character is considered to +// be anything above `Latin-1 Punctuation & Symbols`, which ends at U+00BF +// this allows any non-ascii character, including the full set of unicode characters +// even those in the astral plane (U+010000 → U+10FFFF) as those will be seen as +// their surrogate pairs which are of the format /[\uD800-\uDBFF][\uDC00-\uDFFF]/ +const testIsNonAsciiWordCharacter = char => char.charCodeAt(0) >= 0x00C0; + const validateFlag = (flag, location, ctx) => { if (ctx.schema && ctx.schema.strict) { if (ctx.schema.flags && ctx.schema.flags.includes(flag)) { @@ -310,6 +321,7 @@ export const defaultSyntax = Object.freeze({ parseDate, resolveFieldValue, validateFlag, + testIsNonAsciiWordCharacter, schema: { strict: false, flags: [], fields: {}, ...schema } }); return AST.create(clauses); diff --git a/src/components/search_bar/query/default_syntax.test.js b/src/components/search_bar/query/default_syntax.test.js index 9805d5f81a2..8b2a59ea6fe 100644 --- a/src/components/search_bar/query/default_syntax.test.js +++ b/src/components/search_bar/query/default_syntax.test.js @@ -68,6 +68,28 @@ describe('defaultSyntax', () => { expect(clause.value).toBe('dash-3'); }); + test('unicode field and term values', () => { + const query = 'name:👸Queen_Elizabeth 🤴King_Henry'; + const ast = defaultSyntax.parse(query); + + expect(ast).toBeDefined(); + expect(ast.clauses).toBeDefined(); + expect(ast.clauses).toHaveLength(2); + + let clause = ast.getSimpleFieldClause('name', '👸Queen_Elizabeth'); + expect(clause).toBeDefined(); + expect(AST.Field.isInstance(clause)).toBe(true); + expect(AST.Match.isMustClause(clause)).toBe(true); + expect(clause.field).toBe('name'); + expect(clause.value).toBe('👸Queen_Elizabeth'); + + clause = ast.getTermClause('🤴King_Henry'); + expect(clause).toBeDefined(); + expect(AST.Term.isInstance(clause)).toBe(true); + expect(AST.Match.isMustClause(clause)).toBe(true); + expect(clause.value).toBe('🤴King_Henry'); + }); + test('escaped chars as default clauses', () => { const query = '-\\: \\\\'; const ast = defaultSyntax.parse(query); diff --git a/yarn.lock b/yarn.lock index ce3c12490e3..df5c466b25b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13720,6 +13720,11 @@ underscore@~1.4.4: resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.4.4.tgz#61a6a32010622afa07963bf325203cf12239d604" integrity sha1-YaajIBBiKvoHljvzJSA88SI51gQ= +unicode-11.0.0@^0.7.8: + version "0.7.8" + resolved "https://registry.yarnpkg.com/unicode-11.0.0/-/unicode-11.0.0-0.7.8.tgz#5eecdd6c2802fdd6b260661c57921d0294aeea98" + integrity sha512-O/7kwPxe1ZOiwbLr4/OleBnUDwDRldjubW5SqcQvz1b2EMPp5mQotOdf4L1z/5CNzSDVmWM9cFmseOI6L8vb6g== + unicode-canonical-property-names-ecmascript@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-1.0.4.tgz#2619800c4c825800efdd8343af7dd9933cbe2818" From 9dd2a0a1275236d9e6a115a104806e63bb2c28b4 Mon Sep 17 00:00:00 2001 From: Chandler Prall Date: Tue, 8 Jan 2019 13:14:16 -0700 Subject: [PATCH 2/4] Support for specifically extended character glyphs range --- .../search_bar/query/default_syntax.js | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/components/search_bar/query/default_syntax.js b/src/components/search_bar/query/default_syntax.js index 0ad0765e05f..a22b5873b4e 100644 --- a/src/components/search_bar/query/default_syntax.js +++ b/src/components/search_bar/query/default_syntax.js @@ -129,9 +129,16 @@ wordChar = alnum / [-_:*] / escapedChar - / [^ \\t\\n\\r] { - return testIsNonAsciiWordCharacter(text()) ? text() : ctx.error(\`\${text()} is not a supported character\`); - } + / extendedGlyph + +// This isn't _strictly_ correct: +// for our purposes, a non-ascii word character is considered to +// be anything above \`Latin-1 Punctuation & Symbols\`, which ends at U+00BF +// This allows any non-ascii character, including the full set of unicode characters +// even those in the supplementary planes (U+010000 → U+10FFFF) as those will be seen individually +// in their surrogate pairs which are of the format /[\uD800-\uDBFF][\uDC00-\uDFFF]/ +extendedGlyph + = [\u00C0-\uFFFF] escapedChar = "\\\\" reservedChar @@ -191,14 +198,6 @@ const Exp = { boolean: (expression, location) => ({ type: 'boolean', expression, location }) }; -// this isn't _strictly_ correct -// for our purposes, a non-ascii word character is considered to -// be anything above `Latin-1 Punctuation & Symbols`, which ends at U+00BF -// this allows any non-ascii character, including the full set of unicode characters -// even those in the astral plane (U+010000 → U+10FFFF) as those will be seen as -// their surrogate pairs which are of the format /[\uD800-\uDBFF][\uDC00-\uDFFF]/ -const testIsNonAsciiWordCharacter = char => char.charCodeAt(0) >= 0x00C0; - const validateFlag = (flag, location, ctx) => { if (ctx.schema && ctx.schema.strict) { if (ctx.schema.flags && ctx.schema.flags.includes(flag)) { @@ -321,7 +320,6 @@ export const defaultSyntax = Object.freeze({ parseDate, resolveFieldValue, validateFlag, - testIsNonAsciiWordCharacter, schema: { strict: false, flags: [], fields: {}, ...schema } }); return AST.create(clauses); From 8aea490aa0a1b4109518169fd23a310348c4965a Mon Sep 17 00:00:00 2001 From: Chandler Prall Date: Tue, 8 Jan 2019 13:50:47 -0700 Subject: [PATCH 3/4] changelog --- CHANGELOG.md | 4 ++++ src/components/search_bar/query/default_syntax.js | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c236195f7b..bba7918903b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ - Added `onBlur` prop to `EuiComboBox` ([#1400](https://github.com/elastic/eui/pull/1400)) - Added `initialFocus` prop typedefs to `EuiModal` and `EuiPopover` ([#1410](https://github.com/elastic/eui/pull/1410)) +**Bug fixes** + +- Support extended characters (e.g. non-latin, unicode) in `EuiSearchBar` and `EuiQuery` ([#1415](https://github.com/elastic/eui/pull/1415)) + ## [`6.2.0`](https://github.com/elastic/eui/tree/v6.2.0) - Added `logoCodesandbox` and updated `apmApp` icons ([#1407](https://github.com/elastic/eui/pull/1407)) diff --git a/src/components/search_bar/query/default_syntax.js b/src/components/search_bar/query/default_syntax.js index a22b5873b4e..d2496088a2c 100644 --- a/src/components/search_bar/query/default_syntax.js +++ b/src/components/search_bar/query/default_syntax.js @@ -127,7 +127,7 @@ word wordChar = alnum - / [-_:*] + / [-_*:] / escapedChar / extendedGlyph From 2c699343f90c4e4520bf30c8885a7876a8fbe5a6 Mon Sep 17 00:00:00 2001 From: Chandler Prall Date: Tue, 8 Jan 2019 14:00:19 -0700 Subject: [PATCH 4/4] don't modify yarn.lock --- yarn.lock | 5 ----- 1 file changed, 5 deletions(-) diff --git a/yarn.lock b/yarn.lock index df5c466b25b..ce3c12490e3 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13720,11 +13720,6 @@ underscore@~1.4.4: resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.4.4.tgz#61a6a32010622afa07963bf325203cf12239d604" integrity sha1-YaajIBBiKvoHljvzJSA88SI51gQ= -unicode-11.0.0@^0.7.8: - version "0.7.8" - resolved "https://registry.yarnpkg.com/unicode-11.0.0/-/unicode-11.0.0-0.7.8.tgz#5eecdd6c2802fdd6b260661c57921d0294aeea98" - integrity sha512-O/7kwPxe1ZOiwbLr4/OleBnUDwDRldjubW5SqcQvz1b2EMPp5mQotOdf4L1z/5CNzSDVmWM9cFmseOI6L8vb6g== - unicode-canonical-property-names-ecmascript@^1.0.4: version "1.0.4" resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-1.0.4.tgz#2619800c4c825800efdd8343af7dd9933cbe2818"