Skip to content

Commit

Permalink
Merge pull request #30 from smori1983/matcher-for-japanese-characters
Browse files Browse the repository at this point in the history
Matcher for Japanese characters
  • Loading branch information
smori1983 authored Sep 18, 2023
2 parents bb2dde0 + 2c79566 commit 4d583b8
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/components/SearchBoxFlexSearch.vue
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import { Document } from 'flexsearch'
import data from '@dynamic/vuepress-plugin-flexsearch/data'
import excerpt from './excerpt'
import matcher from './matcher';
import ngram from '../tokenizer/ngram'
/* global FLEX_SEARCH_HOTKEYS */
Expand Down Expand Up @@ -159,6 +160,7 @@ export default {
setUpFlexSearchDocument () {
for (const locale in data) {
const doc = new Document({
matcher: matcher,
tokenize: 'forward',
id: 'key',
index: [
Expand Down
24 changes: 21 additions & 3 deletions src/components/excerpt.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
const ExcerptHtml = require('./excerpt-html');
const TextProcessing = require('./text-processing');

const matcher = require('./matcher');

/**
* @typedef {Object} ExcerptOption
* @property {number} [aroundLength]
* @property {string} [head]
* @property {string} [tail]
* @property {string} [headText]
* @property {string} [tailText]
*/

/**
Expand All @@ -22,7 +24,7 @@ const create = (content, query, option) => {
} = option || {};

const contentLowerCase = content.toLowerCase();
const queryLowerCase = query.toLowerCase();
const queryLowerCase = addMatcher(query).toLowerCase();

const textProcessing = new TextProcessing();

Expand All @@ -46,6 +48,22 @@ const create = (content, query, option) => {
return content.slice(0, aroundLength) + (content.length > aroundLength ? tailText : '');
};

/**
* @param {string} query
* @return {string}
*/
const addMatcher = (query) => {
let converted1 = query;
let converted2 = query;

Object.keys(matcher).forEach((key) => {
converted1 = converted1.replaceAll(key, matcher[key]);
converted2 = converted2.replaceAll(matcher[key], key);
})

return [query, converted1, converted2].join(' ');
};

/**
* @param {string} excerpt
* @param {string[]} queries
Expand Down
95 changes: 95 additions & 0 deletions src/components/matcher.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
module.exports = {
'1': '1',
'2': '2',
'3': '3',
'4': '4',
'5': '5',
'6': '6',
'7': '7',
'8': '8',
'9': '9',
'0': '0',

'あ': 'ア',
'い': 'イ',
'う': 'ウ',
'え': 'エ',
'お': 'オ',
'か': 'カ',
'き': 'キ',
'く': 'ク',
'け': 'ケ',
'こ': 'コ',
'さ': 'サ',
'し': 'シ',
'す': 'ス',
'せ': 'セ',
'そ': 'ソ',
'た': 'タ',
'ち': 'チ',
'つ': 'ツ',
'て': 'テ',
'と': 'ト',
'な': 'ナ',
'に': 'ニ',
'ぬ': 'ヌ',
'ね': 'ネ',
'の': 'ノ',
'は': 'ハ',
'ひ': 'ヒ',
'ふ': 'フ',
'へ': 'ヘ',
'ほ': 'ホ',
'ま': 'マ',
'み': 'ミ',
'む': 'ム',
'め': 'メ',
'も': 'モ',
'や': 'ヤ',
'ゆ': 'ユ',
'よ': 'ヨ',
'ら': 'ラ',
'り': 'リ',
'る': 'ル',
'れ': 'レ',
'ろ': 'ロ',
'わ': 'ワ',
'を': 'ヲ',
'ん': 'ン',

'が': 'ガ',
'ぎ': 'ギ',
'ぐ': 'グ',
'げ': 'ゲ',
'ご': 'ゴ',
'ざ': 'ザ',
'じ': 'ジ',
'ず': 'ズ',
'ぜ': 'ゼ',
'ぞ': 'ゾ',
'だ': 'ダ',
'ぢ': 'ヂ',
'づ': 'ヅ',
'で': 'デ',
'ど': 'ド',
'ば': 'バ',
'び': 'ビ',
'ぶ': 'ブ',
'べ': 'ベ',
'ぼ': 'ボ',

'ぱ': 'パ',
'ぴ': 'ピ',
'ぷ': 'プ',
'ぺ': 'ペ',
'ぽ': 'ポ',

'ぁ': 'ァ',
'ぃ': 'ィ',
'ぅ': 'ゥ',
'ぇ': 'ェ',
'ぉ': 'ォ',
'ゃ': 'ャ',
'ゅ': 'ュ',
'ょ': 'ョ',
};
19 changes: 19 additions & 0 deletions test/components/excerpt.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,25 @@ describe('components', () => {
'weigh ...',
],

// matcher
[
'2023',
{},
'2023年',
'<strong>2023</strong>年',
],
[
'2023',
{},
'2023年',
'<strong>2023</strong>年',
],
[
'いぬ ねこ',
{},
'イヌとネコ',
'<strong>イヌ</strong>と<strong>ネコ</strong>',
],
];

dataSet.forEach(([query, option, content, highlighted]) => {
Expand Down

0 comments on commit 4d583b8

Please sign in to comment.