From 756947562e7b5463dadd3ddc1d57a0ecd41c9c2f Mon Sep 17 00:00:00 2001 From: Juozas Kontvainis Date: Thu, 10 Oct 2024 12:37:04 +0300 Subject: [PATCH] feat: support finding data with typos --- src/__tests__/index.ts | 7 +++++++ src/index.ts | 29 ++++++++++++++++++----------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/__tests__/index.ts b/src/__tests__/index.ts index bcb380e..50225b3 100644 --- a/src/__tests__/index.ts +++ b/src/__tests__/index.ts @@ -54,6 +54,13 @@ const tests: Record = { 'The Tail of Forty Cities', // match2 ], }, + 'matches data that has minor typos': { + input: [ + ['juptyer', 'juppyter', 'jopytar', 'jupytor', 'jepytur'], + 'jupyter', + ], + output: ['juppyter', 'juptyer', 'jupytor'], + }, 'no match for single character inputs that are not equal': { input: [['abc'], 'd'], output: [], diff --git a/src/index.ts b/src/index.ts index 7972c0e..f5bdd73 100644 --- a/src/index.ts +++ b/src/index.ts @@ -264,7 +264,6 @@ function getClosenessRanking( stringToRank: string, ): Ranking { let matchingInOrderCharCount = 0 - let charNumber = 0 function findMatchingCharacter( matchChar: string, string: string, @@ -279,23 +278,31 @@ function getClosenessRanking( } return -1 } + let skipped = 0 function getRanking(spread: number) { const spreadPercentage = 1 / spread const inOrderPercentage = matchingInOrderCharCount / stringToRank.length - const ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage + const matchPercentage = (stringToRank.length - skipped) / stringToRank.length + const ranking = rankings.MATCHES + inOrderPercentage * spreadPercentage * matchPercentage return ranking as Ranking } - const firstIndex = findMatchingCharacter(stringToRank[0], testString, 0) - if (firstIndex < 0) { - return rankings.NO_MATCH - } - charNumber = firstIndex - for (let i = 1, I = stringToRank.length; i < I; i++) { + let firstIndex = 0 + let charNumber = 0 + let nextCharNumber = 0 + for (let i = 0, I = stringToRank.length; i < I; i++) { const matchChar = stringToRank[i] - charNumber = findMatchingCharacter(matchChar, testString, charNumber) - const found = charNumber > -1 - if (!found) { + nextCharNumber = findMatchingCharacter(matchChar, testString, charNumber) + const found = nextCharNumber > -1 + if (found) { + charNumber = nextCharNumber + if (i === 0) { + firstIndex = charNumber + } + } else if (skipped > 0 || stringToRank.length <= 3) { + // if search term is short, require finding all characters return rankings.NO_MATCH + } else { + skipped += 1 } }