diff --git a/README.md b/README.md index bd3af4d..50b3b54 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ yarn add string-natural-compare + `strB` (_string_) + `options` (_object_) - Optional options object with the following options: + `caseInsensitive` (_boolean_) - Set to `true` to compare strings case-insensitively. Default: `false`. + + `alphabet` (_string_) - A string of characters that define a custom character ordering. Default: `undefined`. ```js const naturalCompare = require('string-natural-compare'); @@ -83,9 +84,9 @@ hotelRooms.sort((a, b) => ( // When text transformation is needed or when doing a case-insensitive sort on a -// large array, it is best for performance to pre-compute the transformed text -// and store it in that object. This way, the text transformation will not be -// needed for every comparison while sorting. +// large array of objects, it is best for performance to pre-compute the +// transformed text and store it on the object. This way, the text will not need +// to be transformed for every comparison while sorting. const cars = [ {make: 'Audi', model: 'R8'}, {make: 'Porsche', model: '911 Turbo S'} @@ -95,23 +96,13 @@ for (const car of cars) { car.sortKey = (car.make + ' ' + car.model).toLowerCase(); } cars.sort((a, b) => naturalCompare(a.sortKey, b.sortKey)); -``` - -### Custom Alphabet - -It is possible to configure a custom alphabet to achieve a desired character ordering. - -```js -const naturalCompare = require('string-natural-compare'); -// Estonian alphabet -naturalCompare.alphabet = 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy'; -['t', 'z', 'x', 'õ'].sort(naturalCompare); -// -> ['z', 't', 'õ', 'x'] -// Russian alphabet -naturalCompare.alphabet = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'; -['Ё', 'А', 'б', 'Б'].sort(naturalCompare); +// Using a custom alphabet (Russian alphabet) +const russianOpts = { + alphabet: 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя', +}; +['Ё', 'А', 'б', 'Б'].sort((a, b) => naturalCompare(a, b, russianOpts)); // -> ['А', 'Б', 'Ё', 'б'] ``` diff --git a/benchmark/run.js b/benchmark/run.js index d2b8793..dbc3025 100644 --- a/benchmark/run.js +++ b/benchmark/run.js @@ -104,32 +104,32 @@ if (suite.length) { .run(); } -naturalCompareMaster.alphabet = 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy'; -naturalCompareLocal.alphabet = 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy'; - const alphabetSuite = new Benchmark.Suite(); +const opts = { + alphabet: 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy', +}; if (config.has('6')) { alphabetSuite .add('6) custom alphabet included characters master', () => { - naturalCompareMaster('š.txt', 'z.txt'); - naturalCompareMaster('z.txt', 'š.txt'); + naturalCompareMaster('š.txt', 'z.txt', opts); + naturalCompareMaster('z.txt', 'š.txt', opts); }) .add('6) custom alphabet included characters local', () => { - naturalCompareLocal('š.txt', 'z.txt'); - naturalCompareLocal('z.txt', 'š.txt'); + naturalCompareLocal('š.txt', 'z.txt', opts); + naturalCompareLocal('z.txt', 'š.txt', opts); }); } if (config.has('7')) { alphabetSuite .add('7) custom alphabet missing characters master', () => { - naturalCompareMaster('é.txt', 'à.txt'); - naturalCompareMaster('à.txt', 'é.txt'); + naturalCompareMaster('é.txt', 'à.txt', opts); + naturalCompareMaster('à.txt', 'é.txt', opts); }) .add('7) custom alphabet missing characters local', () => { - naturalCompareLocal('é.txt', 'à.txt'); - naturalCompareLocal('à.txt', 'é.txt'); + naturalCompareLocal('é.txt', 'à.txt', opts); + naturalCompareLocal('à.txt', 'é.txt', opts); }); } diff --git a/natural-compare.js b/natural-compare.js index 860b191..96688a8 100644 --- a/natural-compare.js +++ b/natural-compare.js @@ -1,7 +1,6 @@ 'use strict'; -var alphabet; -var alphabetIndexMap = []; +const defaultAlphabetIndexMap = []; function isNumberCode(code) { return code >= 48 && code <= 57; @@ -15,16 +14,23 @@ function naturalCompare(a, b, opts) { throw new TypeError(`The second argument must be a string. Received type '${typeof b}'`); } - if (opts && opts.caseInsensitive) { - a = a.toLowerCase(); - b = b.toLowerCase(); - } - + var alphabetIndexMap = defaultAlphabetIndexMap; var lengthA = a.length; var lengthB = b.length; var aIndex = 0; var bIndex = 0; + if (opts) { + if (opts.caseInsensitive) { + a = a.toLowerCase(); + b = b.toLowerCase(); + } + + if (opts.alphabet) { + alphabetIndexMap = buildAlphabetIndexMap(opts.alphabet); + } + } + while (aIndex < lengthA && bIndex < lengthB) { var charCodeA = a.charCodeAt(aIndex); var charCodeB = b.charCodeAt(bIndex); @@ -99,33 +105,30 @@ function naturalCompare(a, b, opts) { return lengthA - lengthB; } -Object.defineProperties(naturalCompare, { - alphabet: { - get() { - return alphabet; - }, +const alphabetIndexMapCache = {}; - set(value) { - alphabet = value; - alphabetIndexMap = []; +function buildAlphabetIndexMap(alphabet) { + const existingMap = alphabetIndexMapCache[alphabet]; + if (existingMap !== undefined) { + return existingMap; + } - if (!alphabet) { - return; - } + const indexMap = []; + const maxCharCode = alphabet.split('').reduce((maxCode, char) => { + return Math.max(maxCode, char.charCodeAt(0)); + }, 0); - const maxCharCode = alphabet.split('').reduce((maxCode, char) => { - return Math.max(maxCode, char.charCodeAt(0)); - }, 0); + for (let i = 0; i <= maxCharCode; i++) { + indexMap.push(-1); + } - for (let i = 0; i <= maxCharCode; i++) { - alphabetIndexMap.push(-1); - } + for (let i = 0; i < alphabet.length; i++) { + indexMap[alphabet.charCodeAt(i)] = i; + } - for (let i = 0; i < alphabet.length; i++) { - alphabetIndexMap[alphabet.charCodeAt(i)] = i; - } - }, - }, -}); + alphabetIndexMapCache[alphabet] = indexMap; + + return indexMap; +} module.exports = naturalCompare; diff --git a/test/test.js b/test/test.js index 0f2fc8f..68b801a 100644 --- a/test/test.js +++ b/test/test.js @@ -245,10 +245,12 @@ describe('naturalCompare()', () => { }); it('should compare strings using the provided alphabet', () => { - naturalCompare.alphabet = 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy'; + const opts = { + alphabet: 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy', + }; ['Д', 'a', 'ä', 'B', 'Š', 'X', 'A', 'õ', 'u', 'z', '1', '2', '9', '10'] - .sort(naturalCompare) + .sort((a, b) => naturalCompare(a, b, opts)) .should.deepEqual(['1', '2', '9', '10', 'A', 'B', 'Š', 'X', 'a', 'z', 'u', 'õ', 'ä', 'Д']); naturalCompare.alphabet = null; // Reset alphabet for other tests @@ -267,27 +269,16 @@ describe('naturalCompare()', () => { }); it('should compare strings using the provided alphabet', () => { - naturalCompare.alphabet = 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy'; + const opts = { + alphabet: 'ABDEFGHIJKLMNOPRSŠZŽTUVÕÄÖÜXYabdefghijklmnoprsšzžtuvõäöüxy', + caseInsensitive: true, + }; ['Д', 'a', 'ä', 'B', 'Š', 'X', 'Ü', 'õ', 'u', 'z', '1', '2', '9', '10'] - .sort((a, b) => naturalCompare(a, b, {caseInsensitive: true})) + .sort((a, b) => naturalCompare(a, b, opts)) .should.deepEqual(['1', '2', '9', '10', 'a', 'B', 'Š', 'z', 'u', 'õ', 'ä', 'Ü', 'X', 'Д']); }); }); }); - - -describe('naturalCompare.alphabet', () => { - - it('can be set and retrieved', () => { - naturalCompare.alphabet = 'cba'; - naturalCompare.alphabet.should.equal('cba'); - }); - - it('can be set to null', () => { - naturalCompare.alphabet = null; - }); - -});