From ddcfb766a746db5bf7eb720a8af0a2c4c2d04e65 Mon Sep 17 00:00:00 2001 From: smori Date: Sat, 16 Sep 2023 21:08:43 +0900 Subject: [PATCH 1/3] matcher for Japanese characters --- src/components/SearchBoxFlexSearch.vue | 3 + src/components/excerpt.js | 25 ++++++- src/components/matcher.js | 95 ++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 src/components/matcher.js diff --git a/src/components/SearchBoxFlexSearch.vue b/src/components/SearchBoxFlexSearch.vue index 013bee1..db5b058 100644 --- a/src/components/SearchBoxFlexSearch.vue +++ b/src/components/SearchBoxFlexSearch.vue @@ -51,6 +51,7 @@ import { Document } from 'flexsearch' import data from '@dynamic/vuepress-plugin-flexsearch/data' import excerpt from './excerpt' +import matcher from './matcher'; import ngram from '../tokenizer/ngram' /* global FLEX_SEARCH_HOTKEYS */ @@ -130,6 +131,7 @@ export default { aroundLength: FLEX_SEARCH_EXCERPT_AROUND_LENGTH, headText: FLEX_SEARCH_EXCERPT_HEAD_TEXT, tailText: FLEX_SEARCH_EXCERPT_TAIL_TEXT, + matcher: matcher, }), }; }); @@ -159,6 +161,7 @@ export default { setUpFlexSearchDocument () { for (const locale in data) { const doc = new Document({ + matcher: matcher, tokenize: 'forward', id: 'key', index: [ diff --git a/src/components/excerpt.js b/src/components/excerpt.js index b6c6945..0107a1f 100644 --- a/src/components/excerpt.js +++ b/src/components/excerpt.js @@ -4,8 +4,9 @@ const TextProcessing = require('./text-processing'); /** * @typedef {Object} ExcerptOption * @property {number} [aroundLength] - * @property {string} [head] - * @property {string} [tail] + * @property {string} [headText] + * @property {string} [tailText] + * @property {Object.} [matcher] */ /** @@ -19,10 +20,11 @@ const create = (content, query, option) => { aroundLength = 50, headText = '... ', tailText = ' ...', + matcher = {}, } = option || {}; const contentLowerCase = content.toLowerCase(); - const queryLowerCase = query.toLowerCase(); + const queryLowerCase = addMatcher(query, matcher).toLowerCase(); const textProcessing = new TextProcessing(); @@ -46,6 +48,23 @@ const create = (content, query, option) => { return content.slice(0, aroundLength) + (content.length > aroundLength ? tailText : ''); }; +/** + * @param {string} query + * @param {Object.} matcher + * @return {string} + */ +const addMatcher = (query, matcher) => { + let converted1 = query; + let converted2 = query; + + Object.keys(matcher).forEach((key) => { + converted1 = converted1.replaceAll(key, matcher[key]); + converted2 = converted2.replaceAll(matcher[key], key); + }) + + return [query, converted1, converted2].join(' '); +}; + /** * @param {string} excerpt * @param {string[]} queries diff --git a/src/components/matcher.js b/src/components/matcher.js new file mode 100644 index 0000000..d2ac349 --- /dev/null +++ b/src/components/matcher.js @@ -0,0 +1,95 @@ +module.exports = { + '1': '1', + '2': '2', + '3': '3', + '4': '4', + '5': '5', + '6': '6', + '7': '7', + '8': '8', + '9': '9', + '0': '0', + + 'あ': 'ア', + 'い': 'イ', + 'う': 'ウ', + 'え': 'エ', + 'お': 'オ', + 'か': 'カ', + 'き': 'キ', + 'く': 'ク', + 'け': 'ケ', + 'こ': 'コ', + 'さ': 'サ', + 'し': 'シ', + 'す': 'ス', + 'せ': 'セ', + 'そ': 'ソ', + 'た': 'タ', + 'ち': 'チ', + 'つ': 'ツ', + 'て': 'テ', + 'と': 'ト', + 'な': 'ナ', + 'に': 'ニ', + 'ぬ': 'ヌ', + 'ね': 'ネ', + 'の': 'ノ', + 'は': 'ハ', + 'ひ': 'ヒ', + 'ふ': 'フ', + 'へ': 'ヘ', + 'ほ': 'ホ', + 'ま': 'マ', + 'み': 'ミ', + 'む': 'ム', + 'め': 'メ', + 'も': 'モ', + 'や': 'ヤ', + 'ゆ': 'ユ', + 'よ': 'ヨ', + 'ら': 'ラ', + 'り': 'リ', + 'る': 'ル', + 'れ': 'レ', + 'ろ': 'ロ', + 'わ': 'ワ', + 'を': 'ヲ', + 'ん': 'ン', + + 'が': 'ガ', + 'ぎ': 'ギ', + 'ぐ': 'グ', + 'げ': 'ゲ', + 'ご': 'ゴ', + 'ざ': 'ザ', + 'じ': 'ジ', + 'ず': 'ズ', + 'ぜ': 'ゼ', + 'ぞ': 'ゾ', + 'だ': 'ダ', + 'ぢ': 'ヂ', + 'づ': 'ヅ', + 'で': 'デ', + 'ど': 'ド', + 'ば': 'バ', + 'び': 'ビ', + 'ぶ': 'ブ', + 'べ': 'ベ', + 'ぼ': 'ボ', + + 'ぱ': 'パ', + 'ぴ': 'ピ', + 'ぷ': 'プ', + 'ぺ': 'ペ', + 'ぽ': 'ポ', + + 'ぁ': 'ァ', + 'ぃ': 'ィ', + 'ぅ': 'ゥ', + 'ぇ': 'ェ', + 'ぉ': 'ォ', + 'ゃ': 'ャ', + 'ゅ': 'ュ', + 'ょ': 'ョ', +}; From f6c59c73a5b7480822585c06096a85609704703a Mon Sep 17 00:00:00 2001 From: smori Date: Sun, 17 Sep 2023 16:28:59 +0900 Subject: [PATCH 2/3] require matcher.js in excerpt.js --- src/components/SearchBoxFlexSearch.vue | 1 - src/components/excerpt.js | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/components/SearchBoxFlexSearch.vue b/src/components/SearchBoxFlexSearch.vue index db5b058..dceaa08 100644 --- a/src/components/SearchBoxFlexSearch.vue +++ b/src/components/SearchBoxFlexSearch.vue @@ -131,7 +131,6 @@ export default { aroundLength: FLEX_SEARCH_EXCERPT_AROUND_LENGTH, headText: FLEX_SEARCH_EXCERPT_HEAD_TEXT, tailText: FLEX_SEARCH_EXCERPT_TAIL_TEXT, - matcher: matcher, }), }; }); diff --git a/src/components/excerpt.js b/src/components/excerpt.js index 0107a1f..a1c9e2b 100644 --- a/src/components/excerpt.js +++ b/src/components/excerpt.js @@ -1,12 +1,13 @@ const ExcerptHtml = require('./excerpt-html'); const TextProcessing = require('./text-processing'); +const matcher = require('./matcher'); + /** * @typedef {Object} ExcerptOption * @property {number} [aroundLength] * @property {string} [headText] * @property {string} [tailText] - * @property {Object.} [matcher] */ /** @@ -20,11 +21,10 @@ const create = (content, query, option) => { aroundLength = 50, headText = '... ', tailText = ' ...', - matcher = {}, } = option || {}; const contentLowerCase = content.toLowerCase(); - const queryLowerCase = addMatcher(query, matcher).toLowerCase(); + const queryLowerCase = addMatcher(query).toLowerCase(); const textProcessing = new TextProcessing(); @@ -50,10 +50,9 @@ const create = (content, query, option) => { /** * @param {string} query - * @param {Object.} matcher * @return {string} */ -const addMatcher = (query, matcher) => { +const addMatcher = (query) => { let converted1 = query; let converted2 = query; From 2c79566a32e8263bc53b20fcdff73548fa3e8055 Mon Sep 17 00:00:00 2001 From: smori Date: Tue, 19 Sep 2023 00:42:45 +0900 Subject: [PATCH 3/3] [test] excerpt.js --- test/components/excerpt.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/components/excerpt.js b/test/components/excerpt.js index 9505dce..7a967ae 100644 --- a/test/components/excerpt.js +++ b/test/components/excerpt.js @@ -84,6 +84,25 @@ describe('components', () => { 'weigh ...', ], + // matcher + [ + '2023', + {}, + '2023年', + '2023年', + ], + [ + '2023', + {}, + '2023年', + '2023年', + ], + [ + 'いぬ ねこ', + {}, + 'イヌとネコ', + 'イヌネコ', + ], ]; dataSet.forEach(([query, option, content, highlighted]) => {