diff --git a/lib/mdict-base.js b/lib/mdict-base.js index 1ce2c1c..61b89fc 100644 --- a/lib/mdict-base.js +++ b/lib/mdict-base.js @@ -180,12 +180,6 @@ var MDictBase = /*#__PURE__*/function () { this.header = _common["default"].parseHeader(headerText); // set header default configuration this.header.KeyCaseSensitive = this.header.KeyCaseSensitive || 'No'; - this.compareFn = _common["default"].isTrue(this.header.KeyCaseSensitive) ? _common["default"].normalUpperCaseWordCompare : _common["default"].wordCompare; - - if (this.ext === 'mdd') { - this.compareFn = _common["default"].localCompare; // this.compareFn = common.normalUpperCaseWordCompare; - } - this.header.StripKey = this.header.StripKey || 'Yes'; // encrypted flag // 0x00 - no encryption // 0x01 - encrypt record block @@ -514,7 +508,7 @@ var MDictBase = /*#__PURE__*/function () { }, { key: "_reduceWordKeyBlock", - value: function _reduceWordKeyBlock(phrase, _s) { + value: function _reduceWordKeyBlock(phrase, _s, compareFn) { if (!_s || _s == undefined) { // eslint-disable-next-line _s = function _s(word) { @@ -530,9 +524,9 @@ var MDictBase = /*#__PURE__*/function () { while (left <= right) { mid = left + (right - left >> 1); - if (this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].firstKey)) >= 0 && this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) <= 0) { + if (compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].firstKey)) >= 0 && compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) <= 0) { return mid; - } else if (this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) >= 0) { + } else if (compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) >= 0) { left = mid + 1; } else { right = mid - 1; diff --git a/lib/mdict.js b/lib/mdict.js index 543d0ed..9ce256b 100644 --- a/lib/mdict.js +++ b/lib/mdict.js @@ -48,64 +48,38 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { _this.searchOptions = {}; searchOptions = searchOptions || {}; _this.searchOptions.passcode = searchOptions.passcode || undefined; - _this.searchOptions.keyCaseSensitive = searchOptions.keyCaseSensitive == undefined ? true : searchOptions.keyCaseSensitive; - _this.searchOptions.stripKey = searchOptions.stripKey == undefined ? true : searchOptions.stripKey; + _this.searchOptions.keyCaseSensitive = searchOptions.keyCaseSensitive; + _this.searchOptions.stripKey = searchOptions.stripKey; return _this; } (0, _createClass2["default"])(Mdict, [{ key: "_stripKey", value: function _stripKey() { - var keyCaseSensitive = this.searchOptions.keyCaseSensitive || _common["default"].isTrue(this.header.KeyCaseSensitive); - var stripKey = this.searchOptions.stripKey || _common["default"].isTrue(this.header.StripKey); var regexp = _common["default"].REGEXP_STRIPKEY[this.ext]; - - if (this.ext === 'mdd') { - return function _s(key) { - return key; - }; - } - - if (keyCaseSensitive) { - return stripKey ? function _s(key) { - return key.replace(regexp, '$1'); - } : function _s(key) { - return key; - }; - } - - return this.searchOptions.stripKey || _common["default"].isTrue(this.header.StripKey || (this._version >= 2.0 ? '' : 'yes')) ? function _s(key) { - return key.toLowerCase().replace(regexp, '$1'); + return stripKey ? function _s(key) { + return key.replace(regexp, "$1"); } : function _s(key) { - return key.toLowerCase(); + return key; }; } }, { key: "lookup", value: function lookup(word) { - var sfunc = this._stripKey(); - - var kbid = this._reduceWordKeyBlock(word, sfunc); // not found + var record = this._lookupKID(word); // if not found the key block, return undefined - if (kbid < 0) { + if (record === undefined) { return { keyText: word, definition: null }; } - var list = this._decodeKeyBlockByKBID(kbid); - - var i = this._binarySearh(list, word, sfunc); // if not found the key block, return undefined - - - if (!list[i]) return { - keyText: word, - definition: null - }; + var i = record.idx; + var list = record.list; var rid = this._reduceRecordBlock(list[i].recordStartOffset); @@ -115,25 +89,57 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { return data; } + }, { + key: "_isKeyCaseSensitive", + value: function _isKeyCaseSensitive() { + return this.searchOptions.keyCaseSensitive || _common["default"].isTrue(this.header.KeyCaseSensitive); + } }, { key: "_lookupKID", value: function _lookupKID(word) { - var sfunc = this._stripKey(); + var _this2 = this; + + var lookupInternal = function lookupInternal(compareFn) { + var sfunc = _this2._stripKey(); - var kbid = this._reduceWordKeyBlock(word, sfunc); + var kbid = _this2._reduceWordKeyBlock(word, sfunc, compareFn); // not found + + + if (kbid < 0) { + return undefined; + } - var list = this._decodeKeyBlockByKBID(kbid); + var list = _this2._decodeKeyBlockByKBID(kbid); - var i = this._binarySearh(list, word, sfunc); + var i = _this2._binarySearh(list, word, sfunc, compareFn); - return { - idx: i, - list: list + if (i === undefined) { + return undefined; + } + + return { + idx: i, + list: list + }; }; + + var record; + + if (this._isKeyCaseSensitive()) { + record = lookupInternal(_common["default"].normalUpperCaseWordCompare); + } else { + record = lookupInternal(_common["default"].normalUpperCaseWordCompare); + + if (record === undefined) { + record = lookupInternal(_common["default"].wordCompare); + } + } + + return record; } }, { key: "_binarySearh", - value: function _binarySearh(list, word, _s) { + value: function _binarySearh(list, word, _s, compareFn) { if (!_s || _s == undefined) { // eslint-disable-next-line _s = this._stripKey(); @@ -143,14 +149,14 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { var right = list.length; var mid = 0; - while (left < right) { + while (left <= right) { mid = left + (right - left >> 1); // if case sensitive, the uppercase word is smaller than lowercase word // for example: `Holanda` is smaller than `abacaxi` // so when comparing with the words, we should use the dictionary order, // however, if we change the word to lowercase, the binary search algorithm will be confused // so, we use the enhanced compare function `common.wordCompare` - var compareResult = this.compareFn(_s(word), _s(list[mid].keyText)); // console.log(`@#@# wordCompare ${_s(word)} ${_s(list[mid].keyText)} ${compareResult} l: ${left} r: ${right} mid: ${mid} ${list[mid].keyText}`) + var compareResult = compareFn(_s(word), _s(list[mid].keyText)); // console.log(`@#@# wordCompare ${_s(word)} ${_s(list[mid].keyText)} ${compareResult} l: ${left} r: ${right} mid: ${mid} ${list[mid].keyText}`) if (compareResult > 0) { left = mid + 1; @@ -161,7 +167,43 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { } } - return left; + return undefined; + } + }, { + key: "_findList", + value: function _findList(word) { + var _this3 = this; + + var findListInternal = function findListInternal(compareFn) { + var sfunc = _this3._stripKey(); + + var kbid = _this3._reduceWordKeyBlock(word, sfunc, compareFn); // not found + + + if (kbid < 0) { + return undefined; + } + + return { + sfunc: sfunc, + kbid: kbid, + list: _this3._decodeKeyBlockByKBID(kbid) + }; + }; + + var list; + + if (this._isKeyCaseSensitive()) { + list = findListInternal(_common["default"].normalUpperCaseWordCompare); + } else { + list = findListInternal(_common["default"].normalUpperCaseWordCompare); + + if (list === undefined) { + list = findListInternal(_common["default"].wordCompare); + } + } + + return list; } /** * get word prefix words @@ -171,11 +213,7 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { }, { key: "prefix", value: function prefix(phrase) { - var sfunc = this._stripKey(); - - var kbid = this._reduceWordKeyBlock(phrase, sfunc); - - var list = this._decodeKeyBlockByKBID(kbid); + var list = this._findList(phrase).list; var trie = _doublearray["default"].builder().build(list.map(function (keyword) { return { @@ -199,12 +237,11 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { }, { key: "associate", value: function associate(phrase) { - var sfunc = this._stripKey(); - - var kbid = this._reduceWordKeyBlock(phrase, sfunc); - - var list = this._decodeKeyBlockByKBID(kbid); + var record = this._findList(phrase); + var sfunc = record.sfunc; + var kbid = record.kbid; + var list = record.list; var matched = list.filter(function (item) { return sfunc(item.keyText).startsWith(sfunc(phrase)); }); @@ -251,7 +288,7 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { }, { key: "fuzzy_search", value: function fuzzy_search(word, fuzzy_size, ed_gap) { - var _this2 = this; + var _this4 = this; var fwords = []; var fuzzy_words = []; @@ -264,11 +301,11 @@ var Mdict = /*#__PURE__*/function (_MdictBase) { })); fuzzy_size = fuzzy_size - fwords.length < 0 ? 0 : fuzzy_size - fwords.length; fwords.map(function (fw) { - var _this2$_lookupKID = _this2._lookupKID(fw.key), - idx = _this2$_lookupKID.idx, - list = _this2$_lookupKID.list; + var _this4$_lookupKID = _this4._lookupKID(fw.key), + idx = _this4$_lookupKID.idx, + list = _this4$_lookupKID.list; - return _this2._find_nabor(idx, Math.ceil(fuzzy_size / fwords.length), list).filter(function (item) { + return _this4._find_nabor(idx, Math.ceil(fuzzy_size / fwords.length), list).filter(function (item) { return _common["default"].levenshtein_distance(item.keyText, word) <= ed_gap; }).map(function (kitem) { return fuzzy_words.push({ diff --git a/package.json b/package.json index d8a61a0..5fe4c80 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "lib/mdict.js", "types": "typings/mdict.d.ts", "scripts": { - "debug": "npm run build && mocha --require @babel/register test/debug.test.js", + "debug": "npm run build && mocha --require @babel/register test/debug.spec.js", "test": "npm run build && mocha --require @babel/register test/*.spec.js", "coverage": "npm run build && nyc mocha --require @babel/register", "jest": "npm run build && jest test/*.spec.js", diff --git a/src/mdict-base.js b/src/mdict-base.js index e296ff1..f150fe9 100644 --- a/src/mdict-base.js +++ b/src/mdict-base.js @@ -168,13 +168,7 @@ class MDictBase { // set header default configuration this.header.KeyCaseSensitive = this.header.KeyCaseSensitive || 'No'; - this.compareFn = common.isTrue(this.header.KeyCaseSensitive) - ? common.normalUpperCaseWordCompare - : common.wordCompare; - if (this.ext === 'mdd') { - this.compareFn = common.localCompare; - // this.compareFn = common.normalUpperCaseWordCompare; - } + this.header.StripKey = this.header.StripKey || 'Yes'; // encrypted flag @@ -552,7 +546,7 @@ class MDictBase { * @param {string} phrase searching phrase * @param {function} stripfunc strip key string to compare */ - _reduceWordKeyBlock(phrase, _s) { + _reduceWordKeyBlock(phrase, _s, compareFn) { if (!_s || _s == undefined) { // eslint-disable-next-line _s = (word) => { @@ -568,13 +562,13 @@ class MDictBase { while (left <= right) { mid = left + ((right - left) >> 1); if ( - this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].firstKey)) >= + compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].firstKey)) >= 0 && - this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) <= 0 + compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) <= 0 ) { return mid; } else if ( - this.compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) >= 0 + compareFn(_s(phrase), _s(this.keyBlockInfoList[mid].lastKey)) >= 0 ) { left = mid + 1; } else { diff --git a/src/mdict.js b/src/mdict.js index 1eaa247..465a0af 100644 --- a/src/mdict.js +++ b/src/mdict.js @@ -15,86 +15,90 @@ class Mdict extends MdictBase { this.searchOptions = {}; searchOptions = searchOptions || {}; this.searchOptions.passcode = searchOptions.passcode || undefined; - this.searchOptions.keyCaseSensitive = - searchOptions.keyCaseSensitive == undefined - ? true - : searchOptions.keyCaseSensitive; - this.searchOptions.stripKey = - searchOptions.stripKey == undefined ? true : searchOptions.stripKey; + this.searchOptions.keyCaseSensitive = searchOptions.keyCaseSensitive; + this.searchOptions.stripKey = searchOptions.stripKey; } _stripKey() { - const keyCaseSensitive = - this.searchOptions.keyCaseSensitive || - common.isTrue(this.header.KeyCaseSensitive); const stripKey = this.searchOptions.stripKey || common.isTrue(this.header.StripKey); const regexp = common.REGEXP_STRIPKEY[this.ext]; - if (this.ext === 'mdd') { - return function _s(key) { - return key; - }; - } - - if (keyCaseSensitive) { - return stripKey - ? function _s(key) { - return key.replace(regexp, '$1'); - } - : function _s(key) { - return key; - }; - } - - return this.searchOptions.stripKey || - common.isTrue(this.header.StripKey || (this._version >= 2.0 ? '' : 'yes')) + return stripKey ? function _s(key) { - return key.toLowerCase().replace(regexp, '$1'); - } + return key.replace(regexp, "$1"); + } : function _s(key) { - return key.toLowerCase(); - }; + return key; + }; } lookup(word) { - const sfunc = this._stripKey(); - const kbid = this._reduceWordKeyBlock(word, sfunc); - // not found - if (kbid < 0) { - return { keyText: word, definition: null }; - } - const list = this._decodeKeyBlockByKBID(kbid); - const i = this._binarySearh(list, word, sfunc); + const record = this._lookupKID(word); + // if not found the key block, return undefined - if (!list[i]) return { keyText: word, definition: null }; + if (record === undefined) { + return { + keyText: word, + definition: null, + }; + } + + const i = record.idx; + const list = record.list; + const rid = this._reduceRecordBlock(list[i].recordStartOffset); const nextStart = i + 1 >= list.length ? this._recordBlockStartOffset + - this.recordBlockInfoList[this.recordBlockInfoList.length - 1] - .decompAccumulator + - this.recordBlockInfoList[this.recordBlockInfoList.length - 1] - .decompSize + this.recordBlockInfoList[this.recordBlockInfoList.length - 1] + .decompAccumulator + + this.recordBlockInfoList[this.recordBlockInfoList.length - 1] + .decompSize : list[i + 1].recordStartOffset; const data = this._decodeRecordBlockByRBID( rid, list[i].keyText, list[i].recordStartOffset, - nextStart + nextStart, ); return data; } + _isKeyCaseSensitive() { + return this.searchOptions.keyCaseSensitive || + common.isTrue(this.header.KeyCaseSensitive); + } + _lookupKID(word) { - const sfunc = this._stripKey(); - const kbid = this._reduceWordKeyBlock(word, sfunc); - const list = this._decodeKeyBlockByKBID(kbid); - const i = this._binarySearh(list, word, sfunc); - return { idx: i, list }; + const lookupInternal = (compareFn) => { + const sfunc = this._stripKey(); + const kbid = this._reduceWordKeyBlock(word, sfunc, compareFn); + // not found + if (kbid < 0) { + return undefined; + } + const list = this._decodeKeyBlockByKBID(kbid); + const i = this._binarySearh(list, word, sfunc, compareFn); + if (i === undefined) { + return undefined; + } + return { idx: i, list }; + }; + + let record; + if (this._isKeyCaseSensitive()) { + record = lookupInternal(common.normalUpperCaseWordCompare); + } else { + record = lookupInternal(common.normalUpperCaseWordCompare); + if (record === undefined) { + record = lookupInternal(common.wordCompare); + } + } + return record; } - _binarySearh(list, word, _s) { + _binarySearh(list, word, _s, compareFn) { if (!_s || _s == undefined) { // eslint-disable-next-line _s = this._stripKey(); @@ -102,14 +106,14 @@ class Mdict extends MdictBase { let left = 0; let right = list.length; let mid = 0; - while (left < right) { + while (left <= right) { mid = left + ((right - left) >> 1); // if case sensitive, the uppercase word is smaller than lowercase word // for example: `Holanda` is smaller than `abacaxi` // so when comparing with the words, we should use the dictionary order, // however, if we change the word to lowercase, the binary search algorithm will be confused // so, we use the enhanced compare function `common.wordCompare` - const compareResult = this.compareFn(_s(word), _s(list[mid].keyText)); + const compareResult = compareFn(_s(word), _s(list[mid].keyText)); // console.log(`@#@# wordCompare ${_s(word)} ${_s(list[mid].keyText)} ${compareResult} l: ${left} r: ${right} mid: ${mid} ${list[mid].keyText}`) if (compareResult > 0) { left = mid + 1; @@ -119,7 +123,30 @@ class Mdict extends MdictBase { right = mid - 1; } } - return left; + return undefined; + } + + _findList(word) { + const findListInternal = (compareFn) => { + const sfunc = this._stripKey(); + const kbid = this._reduceWordKeyBlock(word, sfunc, compareFn); + // not found + if (kbid < 0) { + return undefined; + } + return {sfunc, kbid, list:this._decodeKeyBlockByKBID(kbid)}; + }; + + let list; + if (this._isKeyCaseSensitive()) { + list = findListInternal(common.normalUpperCaseWordCompare); + } else { + list = findListInternal(common.normalUpperCaseWordCompare); + if (list === undefined) { + list = findListInternal(common.wordCompare); + } + } + return list; } /** @@ -127,9 +154,7 @@ class Mdict extends MdictBase { * @param {string} phrase the word which needs to find prefix */ prefix(phrase) { - const sfunc = this._stripKey(); - const kbid = this._reduceWordKeyBlock(phrase, sfunc); - const list = this._decodeKeyBlockByKBID(kbid); + const list = this._findList(phrase).list; const trie = dart.builder().build( list.map((keyword) => ({ k: keyword.keyText, @@ -146,9 +171,10 @@ class Mdict extends MdictBase { * @param {string} phrase the word which needs to be associated */ associate(phrase) { - const sfunc = this._stripKey(); - let kbid = this._reduceWordKeyBlock(phrase, sfunc); - let list = this._decodeKeyBlockByKBID(kbid); + const record = this._findList(phrase); + const sfunc = record.sfunc; + let kbid = record.kbid; + let list = record.list; const matched = list.filter((item) => sfunc(item.keyText).startsWith(sfunc(phrase)) );