Skip to content

Commit

Permalink
refactor: Refactor fulltext and other search code into explain/index
Browse files Browse the repository at this point in the history
  • Loading branch information
kgilpin committed Nov 23, 2024
1 parent 8e3be79 commit 4545b12
Show file tree
Hide file tree
Showing 24 changed files with 422 additions and 54 deletions.
6 changes: 3 additions & 3 deletions packages/cli/src/cmds/search/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ import { FileIndex } from '@appland/search';
import { handleWorkingDirectory } from '../../lib/handleWorkingDirectory';
import { verbose } from '../../utils';
import searchSingleAppMap, { SearchOptions as SingleSearchOptions } from './searchSingleAppMap';
import { SearchResponse as DiagramsSearchResponse } from '../../fulltext/appmap-match';
import { SearchResponse as DiagramsSearchResponse } from '../../rpc/explain/index/appmap-match';
import {
SearchResult as EventSearchResult,
SearchResponse as EventSearchResponse,
} from '../../fulltext/FindEvents';
import { openInBrowser } from '../open/openers';
import { buildAppMapIndex, search } from '../../fulltext/appmap-index';
import buildIndexInTempDir from '../../rpc/explain/build-index-in-temp-dir';
import { buildAppMapIndex, search } from '../../rpc/explain/index/appmap-index';
import buildIndexInTempDir from '../../rpc/explain/index/build-index-in-temp-dir';

export const command = 'search <query>';
export const describe =
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/cmds/search/searchSingleAppMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export default async function searchSingleAppMap(
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
// eslint-disable-next-line no-param-reassign
if (appmap.endsWith('.appmap.json')) appmap = appmap.slice(0, -'.appmap.json'.length);

const findEvents = new FindEvents(appmap);
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/fulltext/FindEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import assert from 'assert';

import { verbose } from '../utils';
import { collectParameters } from './collectParameters';
import { fileNameMatchesFilterPatterns } from './filter-patterns';
import { fileNameMatchesFilterPatterns } from '../rpc/explain/index/filter-patterns';

type IndexItem = {
fqid: string;
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/rpc/explain/EventCollector.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { isAbsolute, join } from 'path';
import { ContextV2 } from '@appland/navie';
import { SearchRpc } from '@appland/rpc';
import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match';
import { SearchResponse as AppMapSearchResponse } from './index/appmap-match';
import FindEvents, {
SearchResponse as EventSearchResponse,
SearchOptions as EventsSearchOptions,
Expand Down
6 changes: 3 additions & 3 deletions packages/cli/src/rpc/explain/SearchContextCollector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ import { ContextV2, applyContext } from '@appland/navie';
import { SearchRpc } from '@appland/rpc';
import { FileIndex, FileSearchResult } from '@appland/search';

import { SearchResponse as AppMapSearchResponse } from '../../fulltext/appmap-match';
import { SearchResponse as AppMapSearchResponse } from './index/appmap-match';
import { DEFAULT_MAX_DIAGRAMS } from '../search/search';
import EventCollector from './EventCollector';
import indexFiles from './index-files';
import indexSnippets from './index-snippets';
import collectSnippets from './collect-snippets';
import buildIndexInTempDir from './build-index-in-temp-dir';
import { buildAppMapIndex, search } from '../../fulltext/appmap-index';
import buildIndexInTempDir from './index/build-index-in-temp-dir';
import { buildAppMapIndex, search } from './index/appmap-index';

export default class SearchContextCollector {
public excludePatterns: RegExp[] | undefined;
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/rpc/explain/collect-snippets.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ContextV2 } from '@appland/navie';
import { parseFileChunkSnippetId, SnippetIndex, SnippetSearchResult } from '@appland/search';
import { CHARS_PER_SNIPPET } from './collectContext';
import { CHARS_PER_SNIPPET } from './collect-context';

export default function collectSnippets(
snippetIndex: SnippetIndex,
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/rpc/explain/explain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import collectProjectInfos from '../../cmds/navie/projectInfo';
import configuration, { AppMapDirectory } from '../configuration';
import { getLLMConfiguration } from '../llmConfiguration';
import { RpcError, RpcHandler } from '../rpc';
import collectContext from './collectContext';
import collectContext from './collect-context';
import { initializeHistory } from './navie/historyHelper';
import { ThreadAccessError } from './navie/ihistory';
import INavie, { INavieProvider } from './navie/inavie';
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/rpc/explain/fileFilter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { FilterFn, isBinaryFile, isDataFile, isLargeFile } from '@appland/search';
import makeDebug from 'debug';
import { fileNameMatchesFilterPatterns } from '../../fulltext/filter-patterns';
import { fileNameMatchesFilterPatterns } from './index/filter-patterns';

const debug = makeDebug('appmap:rpc:explain:file-filter');

Expand Down
3 changes: 0 additions & 3 deletions packages/cli/src/rpc/explain/index-files.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sqlite3 from 'better-sqlite3';
import makeDebug from 'debug';

import {
buildFileIndex,
Expand All @@ -10,8 +9,6 @@ import {
} from '@appland/search';
import fileFilter from './fileFilter';

const debug = makeDebug('appmap:rpc:explain:index-files');

export default async function indexFiles(
db: sqlite3.Database,
directories: string[],
Expand Down
31 changes: 31 additions & 0 deletions packages/cli/src/rpc/explain/index/appmap-file-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import sqlite3 from 'better-sqlite3';

import { FileIndex } from '@appland/search';

import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir';
import { buildAppMapIndex, search } from './appmap-index';
import { SearchResponse } from './appmap-match';

export async function buildAppMapFileIndex(
appmapDirectories: string[]
): Promise<CloseableIndex<FileIndex>> {
return await buildIndexInTempDir<FileIndex>('appmaps', async (indexFile) => {
const db = new sqlite3(indexFile);
const fileIndex = new FileIndex(db);
await buildAppMapIndex(fileIndex, appmapDirectories);
return fileIndex;
});
}

export async function searchAppMapFiles(
appmapDirectories: string[],
vectorTerms: string[],
maxDiagrams: number
): Promise<SearchResponse> {
const index = await buildAppMapFileIndex(appmapDirectories);
try {
return await search(index.index, vectorTerms.join(' OR '), maxDiagrams);
} finally {
index.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { readFile } from 'fs/promises';
import { Metadata } from '@appland/models';
import { buildFileIndex, FileIndex, fileTokens } from '@appland/search';

import { findFiles, isNodeError, verbose } from '../utils';
import { findFiles, isNodeError, verbose } from '../../../utils';
import {
downscoreOutOfDateMatches,
Match,
Expand All @@ -14,12 +14,12 @@ import {
scoreMatches,
SearchResponse,
} from './appmap-match';
import loadAppMapConfig from '../lib/loadAppMapConfig';
import loadAppMapConfig from '../../../lib/loadAppMapConfig';

type ClassMapEntry = {
export type ClassMapEntry = {
name: string;
type: string;
labels: string[];
labels?: string[];
children: ClassMapEntry[];
static?: boolean;
sourceLocation?: string;
Expand Down Expand Up @@ -48,43 +48,46 @@ export async function listAppMaps(directory: string): Promise<string[]> {
return appmapFiles.map(relativeToPath);
}

export async function readIndexFile<T>(
appmapName: string,
indexName: string
): Promise<T | undefined> {
const indexFile = join(appmapName, [indexName, '.json'].join(''));
let indexStr: string;
try {
indexStr = await readFile(indexFile, 'utf-8');
} catch (e) {
if (isNativeError(e) && !isNodeError(e, 'ENOENT')) {
warn(`Error reading metadata file ${indexFile}: ${e.message}`);
}
return undefined;
}

try {
return JSON.parse(indexStr) as T;
} catch (e) {
const errorMessage = isNativeError(e) ? e.message : String(e);
warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`);
}
}

/**
* Read all content for an AppMap. For efficiency, utilizes the AppMap index files, rather
* than reading the entire AppMap file directly.
*/
export async function readAppMapContent(appmapFile: string): Promise<string> {
const appmapName = appmapFile.replace(/\.appmap\.json$/, '');

async function readIndexFile<T>(name: string): Promise<T | undefined> {
const indexFile = join(appmapName, [name, '.json'].join(''));
let indexStr: string;
try {
indexStr = await readFile(indexFile, 'utf-8');
} catch (e) {
if (isNativeError(e) && !isNodeError(e, 'ENOENT')) {
warn(`Error reading metadata file ${indexFile}: ${e.message}`);
}
return undefined;
}

try {
return JSON.parse(indexStr) as T;
} catch (e) {
const errorMessage = isNativeError(e) ? e.message : String(e);
warn(`Error parsing metadata file ${indexFile}: ${errorMessage}`);
}
}

const appmapWords = new Array<string>();

const metadata = await readIndexFile<Metadata>('metadata');
const metadata = await readIndexFile<Metadata>(appmapName, 'metadata');
if (metadata) {
appmapWords.push(metadata.name);
if (metadata.labels) appmapWords.push(...metadata.labels);
if (metadata.exception) appmapWords.push(metadata.exception.message);
}

const classMap = (await readIndexFile<ClassMapEntry[]>('classMap')) ?? [];
const classMap = (await readIndexFile<ClassMapEntry[]>(appmapName, 'classMap')) ?? [];

const queries = new Array<string>();
const codeObjects = new Array<string>();
Expand Down Expand Up @@ -119,7 +122,7 @@ export async function readAppMapContent(appmapFile: string): Promise<string> {
classMap.forEach((co) => collectClassMapEntry(co));
appmapWords.push(...queries, ...codeObjects, ...routes, ...externalRoutes);

const parameters = (await readIndexFile<string[]>('canonical.parameters')) ?? [];
const parameters = (await readIndexFile<string[]>(appmapName, 'canonical.parameters')) ?? [];
appmapWords.push(...parameters);
appmapWords.push(...types);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import UpToDate from '../lib/UpToDate';
import { exists } from '../utils';
import UpToDate from '../../../lib/UpToDate';
import { exists } from '../../../utils';

import makeDebug from 'debug';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export interface Closeable {
close(): void;
}

type CloseableIndex<T> = {
export type CloseableIndex<T> = {
index: T;
close: () => void;
};
Expand Down
83 changes: 83 additions & 0 deletions packages/cli/src/rpc/explain/index/project-file-index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import sqlite3 from 'better-sqlite3';
import makeDebug from 'debug';

import {
buildFileIndex,
FileIndex,
FileSearchResult,
fileTokens,
FilterFn,
isBinaryFile,
isDataFile,
isLargeFile,
listProjectFiles,
readFileSafe,
} from '@appland/search';
import { fileNameMatchesFilterPatterns } from './filter-patterns';

import buildIndexInTempDir, { CloseableIndex } from './build-index-in-temp-dir';

const debug = makeDebug('appmap:index:project-files');

function fileFilter(
includePatterns: RegExp[] | undefined,
excludePatterns: RegExp[] | undefined
): FilterFn {
return async (path: string) => {
debug('Filtering: %s', path);
if (isBinaryFile(path)) {
debug('Skipping binary file: %s', path);
return false;
}

const includeFile = fileNameMatchesFilterPatterns(path, includePatterns, excludePatterns);
if (!includeFile) return false;

const isData = isDataFile(path);
if (isData && (await isLargeFile(path))) {
debug('Skipping large data file: %s', path);
return false;
}

return true;
};
}

async function indexFiles(
db: sqlite3.Database,
directories: string[],
includePatterns: RegExp[] | undefined,
excludePatterns: RegExp[] | undefined
): Promise<FileIndex> {
const fileIndex = new FileIndex(db);

const filter = fileFilter(includePatterns, excludePatterns);
await buildFileIndex(fileIndex, directories, listProjectFiles, filter, readFileSafe, fileTokens);

return fileIndex;
}

export async function buildProjectFileIndex(
sourceDirectories: string[],
includePatterns: RegExp[] | undefined,
excludePatterns: RegExp[] | undefined
): Promise<CloseableIndex<FileIndex>> {
return await buildIndexInTempDir('files', async (indexFile) => {
const db = new sqlite3(indexFile);
return await indexFiles(db, sourceDirectories, includePatterns, excludePatterns);
});
}

export async function searchProjectFiles(
sourceDirectories: string[],
includePatterns: RegExp[] | undefined,
excludePatterns: RegExp[] | undefined,
vectorTerms: string[]
): Promise<FileSearchResult[]> {
const index = await buildProjectFileIndex(sourceDirectories, includePatterns, excludePatterns);
try {
return index.index.search(vectorTerms.join(' OR '));
} finally {
index.close();
}
}
8 changes: 4 additions & 4 deletions packages/cli/src/rpc/search/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import { FileIndex } from '@appland/search';
import { SearchRpc } from '@appland/rpc';

import { RpcHandler } from '../rpc';
import { SearchResponse } from '../../fulltext/appmap-match';
import { search as searchAppMaps } from '../../fulltext/appmap-index';
import { SearchResponse } from '../explain/index/appmap-match';
import { search as searchAppMaps } from '../explain/index/appmap-index';
import searchSingleAppMap from '../../cmds/search/searchSingleAppMap';
import configuration, { AppMapDirectory } from '../configuration';
import buildIndexInTempDir from '../explain/build-index-in-temp-dir';
import { buildAppMapIndex } from '../../fulltext/appmap-index';
import buildIndexInTempDir from '../explain/index/build-index-in-temp-dir';
import { buildAppMapIndex } from '../explain/index/appmap-index';

export const DEFAULT_MAX_DIAGRAMS = 10;
export const DEFAULT_MAX_EVENTS_PER_DIAGRAM = 100;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { vol } from 'memfs';
import { readAppMapContent } from '../../../src/fulltext/appmap-index';
import { readAppMapContent } from '../../../src/rpc/explain/index/appmap-index';
import { Metadata } from '@appland/models';

jest.mock('fs/promises', () => require('memfs').promises);
Expand Down
4 changes: 2 additions & 2 deletions packages/cli/tests/unit/fulltext/appmap-index.search.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import UpToDate, { AppMapIndex } from '../../../src/lib/UpToDate';
import { PathLike } from 'fs';
import { join } from 'path';
import { FileIndex, FileSearchResult } from '@appland/search';
import { search } from '../../../src/fulltext/appmap-index';
import { SearchStats } from '../../../src/fulltext/appmap-match';
import { search } from '../../../src/rpc/explain/index/appmap-index';
import { SearchStats } from '../../../src/rpc/explain/index/appmap-match';

jest.mock('../../../src/utils');
jest.mock('../../../src/lib/UpToDate');
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ContextCollector } from '../../../../src/rpc/explain/collectContext';
import { ContextCollector } from '../../../../src/rpc/explain/collect-context';
import * as SearchContextCollector from '../../../../src/rpc/explain/SearchContextCollector';
import * as LocationContextCollector from '../../../../src/rpc/explain/LocationContextCollector';
import * as navie from '@appland/navie';
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/tests/unit/rpc/explain/EventCollector.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { join } from 'path';

import { textSearchResultToRpcSearchResult } from '../../../../src/rpc/explain/textSearchResultToRpcSearchResult';
import buildContext from '../../../../src/rpc/explain/buildContext';
import { SearchResponse as AppMapSearchResponse } from '../../../../src/fulltext/appmap-match';
import { SearchResponse as AppMapSearchResponse } from '../../../../src/rpc/explain/index/appmap-match';
import FindEvents, {
SearchResponse as EventSearchResponse,
} from '../../../../src/fulltext/FindEvents';
Expand Down
Loading

0 comments on commit 4545b12

Please sign in to comment.