Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

next: Command #670

Merged
merged 20 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions packages/bits-ui/src/lib/bits/command/command-score.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// eslint-disable-next-line ts/ban-ts-comment
// @ts-nocheck
// The scores are arranged so that a continuous match of characters will
// result in a total score of 1.
//
// The best case, this character is a match, and either this is the start
// of the string, or the previous character was also a match.
const SCORE_CONTINUE_MATCH = 1;
// A new match at the start of a word scores better than a new match
// elsewhere as it's more likely that the user will type the starts
// of fragments.
// NOTE: We score word jumps between spaces slightly higher than slashes, brackets
// hyphens, etc.
const SCORE_SPACE_WORD_JUMP = 0.9;
const SCORE_NON_SPACE_WORD_JUMP = 0.8;
// Any other match isn't ideal, but we include it for completeness.
const SCORE_CHARACTER_JUMP = 0.17;
// If the user transposed two letters, it should be significantly penalized.
//
// i.e. "ouch" is more likely than "curtain" when "uc" is typed.
const SCORE_TRANSPOSITION = 0.1;
// The goodness of a match should decay slightly with each missing
// character.
//
// i.e. "bad" is more likely than "bard" when "bd" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 100 characters are inserted between matches.
const PENALTY_SKIPPED = 0.999;
// The goodness of an exact-case match should be higher than a
// case-insensitive match by a small amount.
//
// i.e. "HTML" is more likely than "haml" when "HM" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 1000 characters are inserted between matches.
const PENALTY_CASE_MISMATCH = 0.9999;
// Match higher for letters closer to the beginning of the word
const PENALTY_DISTANCE_FROM_START = 0.9;
// If the word has more characters than the user typed, it should
// be penalised slightly.
//
// i.e. "html" is more likely than "html5" if I type "html".
//
// However, it may well be the case that there's a sensible secondary
// ordering (like alphabetical) that it makes sense to rely on when
// there are many prefix matches, so we don't make the penalty increase
// with the number of tokens.
const PENALTY_NOT_COMPLETE = 0.99;

const IS_GAP_REGEXP = /[\\/_+.#"@[({&]/;
const COUNT_GAPS_REGEXP = /[\\/_+.#"@[({&]/g;
const IS_SPACE_REGEXP = /[\s-]/;
const COUNT_SPACE_REGEXP = /[\s-]/g;

function commandScoreInner(
string,
abbreviation,
lowerString,
lowerAbbreviation,
stringIndex,
abbreviationIndex,
memoizedResults
) {
if (abbreviationIndex === abbreviation.length) {
if (stringIndex === string.length) {
return SCORE_CONTINUE_MATCH;
}
return PENALTY_NOT_COMPLETE;
}

const memoizeKey = `${stringIndex},${abbreviationIndex}`;
if (memoizedResults[memoizeKey] !== undefined) {
return memoizedResults[memoizeKey];
}

const abbreviationChar = lowerAbbreviation.charAt(abbreviationIndex);
let index = lowerString.indexOf(abbreviationChar, stringIndex);
let highScore = 0;

let score, transposedScore, wordBreaks, spaceBreaks;

while (index >= 0) {
score = commandScoreInner(
string,
abbreviation,
lowerString,
lowerAbbreviation,
index + 1,
abbreviationIndex + 1,
memoizedResults
);
if (score > highScore) {
if (index === stringIndex) {
score *= SCORE_CONTINUE_MATCH;
} else if (IS_GAP_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_NON_SPACE_WORD_JUMP;
wordBreaks = string.slice(stringIndex, index - 1).match(COUNT_GAPS_REGEXP);
if (wordBreaks && stringIndex > 0) {
score *= PENALTY_SKIPPED ** wordBreaks.length;
}
} else if (IS_SPACE_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_SPACE_WORD_JUMP;
spaceBreaks = string.slice(stringIndex, index - 1).match(COUNT_SPACE_REGEXP);
if (spaceBreaks && stringIndex > 0) {
score *= PENALTY_SKIPPED ** spaceBreaks.length;
}
} else {
score *= SCORE_CHARACTER_JUMP;
if (stringIndex > 0) {
score *= PENALTY_SKIPPED ** (index - stringIndex);
}
}

if (string.charAt(index) !== abbreviation.charAt(abbreviationIndex)) {
score *= PENALTY_CASE_MISMATCH;
}
}

if (
(score < SCORE_TRANSPOSITION &&
lowerString.charAt(index - 1) ===
lowerAbbreviation.charAt(abbreviationIndex + 1)) ||
(lowerAbbreviation.charAt(abbreviationIndex + 1) ===
lowerAbbreviation.charAt(abbreviationIndex) && // allow duplicate letters. Ref #7428
lowerString.charAt(index - 1) !== lowerAbbreviation.charAt(abbreviationIndex))
) {
transposedScore = commandScoreInner(
string,
abbreviation,
lowerString,
lowerAbbreviation,
index + 1,
abbreviationIndex + 2,
memoizedResults
);

if (transposedScore * SCORE_TRANSPOSITION > score) {
score = transposedScore * SCORE_TRANSPOSITION;
}
}

if (score > highScore) {
highScore = score;
}

index = lowerString.indexOf(abbreviationChar, index + 1);
}

memoizedResults[memoizeKey] = highScore;
return highScore;
}

function formatInput(string) {
// convert all valid space characters to space so they match each other
return string.toLowerCase().replace(COUNT_SPACE_REGEXP, " ");
}

export function commandScore(string: string, abbreviation: string, aliases?: string[]): number {
/* NOTE:
* in the original, we used to do the lower-casing on each recursive call, but this meant that toLowerCase()
* was the dominating cost in the algorithm, passing both is a little ugly, but considerably faster.
*/
string = aliases && aliases.length > 0 ? `${`${string} ${aliases?.join(" ")}`}` : string;
return commandScoreInner(
string,
abbreviation,
formatInput(string),
formatInput(abbreviation),
0,
0,
{}
);
}
Loading