Skip to content

Commit

Permalink
feat: add conditional fuzzy search
Browse files Browse the repository at this point in the history
with ?, removed links from search consideration and added exp scoring
  • Loading branch information
sshivaditya committed Nov 1, 2024
1 parent 1408bf2 commit 25e06c2
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 18 deletions.
24 changes: 19 additions & 5 deletions src/home/issues-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ export class IssueSearch {
}

public search(searchText: string, issueIds: number[]): Map<number, SearchResult> {
const filterText = searchText.toLowerCase().trim();
let filterText = searchText.toLowerCase().trim();
const results = new Map<number, SearchResult>();
const isFuzzySearchEnabled = filterText.startsWith('?');

if (isFuzzySearchEnabled) {
filterText = filterText.slice(1).trim();
}

if (!filterText) {
issueIds.forEach((id) => results.set(id, this._createEmptyResult()));
Expand All @@ -50,15 +55,15 @@ export class IssueSearch {
return;
}

const result = this._calculateIssueRelevance(issue, searchTerms);
const result = this._calculateIssueRelevance(issue, searchTerms, isFuzzySearchEnabled);
results.set(issueId, result);
});

this._calculateNDCGScore(results);
return results;
}

private _calculateIssueRelevance(issue: GitHubIssue, searchTerms: string[]): SearchResult {
private _calculateIssueRelevance(issue: GitHubIssue, searchTerms: string[], enableFuzzy: boolean): SearchResult {
const matchDetails = {
titleMatches: [] as string[],
bodyMatches: [] as string[],
Expand All @@ -77,7 +82,7 @@ export class IssueSearch {
const scores = {
title: this._searchScorer.calculateTitleScore(issue, searchTerms, matchDetails),
body: this._searchScorer.calculateBodyScore(issue, searchTerms, matchDetails),
fuzzy: this._searchScorer.calculateFuzzyScore(searchableContent, searchTerms, matchDetails),
fuzzy: enableFuzzy ? this._searchScorer.calculateFuzzyScore(searchableContent, searchTerms, matchDetails) : 0,
meta: this._searchScorer.calculateMetaScore(issue, searchTerms, matchDetails),
};

Expand Down Expand Up @@ -124,7 +129,16 @@ export class IssueSearch {
}

private _getSearchableContent(issue: GitHubIssue): string {
return `${issue.title} ${issue.body || ""} ${issue.labels?.map((l) => (typeof l === "object" && l.name ? l.name : "")).join(" ") || ""}`.toLowerCase();
// Remove URLs from the content
const removeUrls = (text: string): string => {
return text.replace(/https?:\/\/[^\s]+/g, '');
};

const title = issue.title;
const body = removeUrls(issue.body || "");
const labels = issue.labels?.map((l) => (typeof l === "object" && l.name ? l.name : "")).join(" ") || "";

return `${title} ${body} ${labels}`.toLowerCase();
}

private _createEmptyResult(visible: boolean = true): SearchResult {
Expand Down
71 changes: 58 additions & 13 deletions src/home/search/search-scorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@ export class SearchScorer {
): number {
let score = 0;
const title = issue.title.toLowerCase();
const words = title.split(/\s+/);

searchTerms.forEach(term => {
if (title.includes(term)) {
matchDetails.titleMatches.push(term);
score += this._config.exactMatchBonus;
if (title.startsWith(term)) {
score += 0.5;
}

// Apply exponential boost for word beginnings
words.forEach(word => {
if (word.startsWith(term)) {
// e^(-x) where x is the position of the match relative to word length
const positionBoost = Math.exp(-term.length / word.length);
score += positionBoost;
}
});
}
});

Expand All @@ -36,13 +43,23 @@ export class SearchScorer {
): number {
let score = 0;
const body = (issue.body || '').toLowerCase();
const words = body.split(/\s+/);

searchTerms.forEach(term => {
const matches = body.match(new RegExp(term, 'gi')) || [];
if (matches.length > 0) {
let termScore = 0;
words.forEach(word => {
if (word.startsWith(term)) {
// Apply exponential boost for word beginnings
const positionBoost = Math.exp(-term.length / word.length);
termScore += positionBoost;
}
});

if (termScore > 0) {
matchDetails.bodyMatches.push(term);
score += Math.min(matches.length / 2, 1);
score += Math.min(termScore, 1);
}

const codeBlockMatches = body.match(/```[\s\S]*?```/g) || [];
codeBlockMatches.forEach(block => {
if (block.toLowerCase().includes(term)) {
Expand All @@ -67,9 +84,17 @@ export class SearchScorer {
if (issue.labels) {
searchTerms.forEach(term => {
issue.labels?.forEach(label => {
if (typeof label === 'object' && label.name && label.name.toLowerCase().includes(term)) {
matchDetails.labelMatches.push(label.name);
score += 0.5;
if (typeof label === 'object' && label.name) {
const labelName = label.name.toLowerCase();
if (labelName.includes(term)) {
matchDetails.labelMatches.push(label.name);
// Apply exponential boost for label matches at word start
if (labelName.startsWith(term)) {
score += 0.8;
} else {
score += 0.5;
}
}
}
});
});
Expand All @@ -85,24 +110,44 @@ export class SearchScorer {
): number {
let score = 0;
const contentWords = this._tokenizeContent(content);

searchTerms.forEach(searchTerm => {
let bestMatch = {
word: '',
score: 0
score: 0,
isWordStart: false
};

contentWords.forEach(word => {
const similarity = StringSimilarity.calculate(searchTerm, word);
if (similarity > this._config.fuzzySearchThreshold && similarity > bestMatch.score) {
bestMatch = { word, score: similarity };
const isWordStart = word.startsWith(searchTerm);

// Calculate position-based boost
const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0;
const adjustedScore = similarity + positionBoost;

if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) {
bestMatch = {
word,
score: adjustedScore,
isWordStart
};
}
});

if (bestMatch.score > 0) {
matchDetails.fuzzyMatches.push({
original: searchTerm,
matched: bestMatch.word,
score: bestMatch.score
});
score += bestMatch.score * this._config.fuzzyMatchWeight;

// Apply exponential weight for word-start matches
const finalScore = bestMatch.isWordStart
? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight)
: bestMatch.score * this._config.fuzzyMatchWeight;

score += finalScore;
}
});

Expand Down

0 comments on commit 25e06c2

Please sign in to comment.