From 10538dd8b52d117567b6c81e423963d381223abc Mon Sep 17 00:00:00 2001 From: Alvaro Sanchez-Leon Date: Wed, 31 Mar 2021 22:14:00 -0400 Subject: [PATCH] Improve include/exclude search in workspace Enhancements: * Search with 'Files to include' specifying a relative path e.g. './README.md' will only consider existing files from workspace roots e.g. '${ROOT1}/README.md', '${ROOT2}/README.md'. * 'Files to include' are limiting the search to specific folders e.g. './test' should not include '${ROOT}/dir/test'. * Include/Exclude strings (i.e. not resolving to relative paths) are converted to two globs in order to include results of a resulting file and include files under a resulting folder i.e. resolved globs are `**/string` and `**/string/*`. e.g. input file '.snaphot' shall include a file '.snapshot' or if it's a folder it should apply to all the files underneath it. * Absulte path(s) given as include patterns override the search in workspace to search under the given file/folder path(s). * Files opened in editor will search the contents in editor so the search will exclude these files when searching the file system. * Include / Exclude file strings starting with './' (linux) or '.\' (windows) will be applied as absolute patterns relative to the applicable search paths (even if they include glob patterns). Signed-off-by: Alvaro Sanchez-Leon --- ...search-in-workspace-result-tree-widget.tsx | 12 +- ...ep-search-in-workspace-server.slow-spec.ts | 220 +++++++++++++++++- .../ripgrep-search-in-workspace-server.ts | 141 +++++++++-- .../src/node/ripgrep-search-utils.ts | 123 ++++++++++ 4 files changed, 465 insertions(+), 31 deletions(-) create mode 100644 packages/search-in-workspace/src/node/ripgrep-search-utils.ts diff --git a/packages/search-in-workspace/src/browser/search-in-workspace-result-tree-widget.tsx b/packages/search-in-workspace/src/browser/search-in-workspace-result-tree-widget.tsx index b2e414c094765..95c2fb54fe93a 100644 --- a/packages/search-in-workspace/src/browser/search-in-workspace-result-tree-widget.tsx +++ b/packages/search-in-workspace/src/browser/search-in-workspace-result-tree-widget.tsx @@ -290,7 +290,8 @@ export class SearchInWorkspaceResultTreeWidget extends TreeWidget { } return workspaceRootUri.toString().concat(pattern.replace('./', '/')); } - return globalPrefix.concat(pattern); + + return pattern.startsWith('/') ? '**'.concat(pattern) : globalPrefix.concat(pattern); } /** @@ -517,13 +518,8 @@ export class SearchInWorkspaceResultTreeWidget extends TreeWidget { // Exclude files already covered by searching open editors. this.editorManager.all.forEach(e => { - const rootUri = this.workspaceService.getWorkspaceRootUri(e.editor.uri); - if (rootUri) { - // Exclude pattern beginning with './' works after the fix of #8469. - const { name, path } = this.filenameAndPath(e.editor.uri.toString(), rootUri.toString()); - const excludePath: string = path === '' ? './' + name : path + '/' + name; - searchOptions.exclude = (searchOptions.exclude) ? searchOptions.exclude.concat(excludePath) : [excludePath]; - } + const excludePath: string = e.editor.uri.path.toString(); + searchOptions.exclude = (searchOptions.exclude) ? searchOptions.exclude.concat(excludePath) : [excludePath]; }); // Reduce `maxResults` due to editor results. diff --git a/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.slow-spec.ts b/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.slow-spec.ts index 6338f45b4c87a..86b1251e58427 100644 --- a/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.slow-spec.ts +++ b/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.slow-spec.ts @@ -89,7 +89,11 @@ const getRootPathFromName = (name: string) => { glob: rootDirA, 'lots-of-matches': rootDirA, orange: rootDirB, - folderSubfolder: rootSubdirA + folderSubfolder: rootSubdirA, + 'orange/navel': `${rootDirA}`, + 'orange/hamlin': `${rootDirA}`, + 'test/test-spec.ts': `${rootDirA}`, + 'small/test/test-spec.ts': `${rootDirA}` }; return names[name]; }; @@ -128,9 +132,25 @@ aaa hello. x h3lo y hell0h3lllo hello1 `); - fs.mkdirSync(rootDirA + '/small'); + const smallDirPath = rootDirA + '/small'; + fs.mkdirSync(smallDirPath); createTestFile('small', 'A small file.\n'); + const copyrightLine = '\ + Copyright (C) 2021 and others.'; + fs.mkdirSync(smallDirPath + '/test'); + createTestFile('small/test/test-spec.ts', copyrightLine); + + fs.mkdirSync(rootDirA + '/test'); + createTestFile('test/test-spec.ts', copyrightLine); + + fs.mkdirSync(rootDirA + '/orange'); + createTestFile('orange/hamlin', '\ + Hamlin orange is one of our most cold-hardy sweet oranges. Grown since 1885'); + + createTestFile('orange/navel', '\ + Most well known orange type'); + if (!isWindows) { createTestFile('file:with:some:colons', `\ Are you looking for this: --foobar? @@ -681,6 +701,85 @@ describe('ripgrep-search-in-workspace-server', function (): void { ripgrepServer.search(pattern, [rootDirAUri], { include: ['*.txt'], matchWholeWord: true }); }); + it('should search in a given file by relative path', done => { + const pattern = 'carrots'; + + const client = new ResultAccumulator(() => { + const expected: SearchInWorkspaceExpectation[] = [ + { root: rootDirAUri, fileUri: 'potatoes', line: 1, character: 18, length: pattern.length, lineText: '' } + ]; + + compareSearchResults(expected, client.results); + done(); + }); + ripgrepServer.setClient(client); + ripgrepServer.search(pattern, [rootDirAUri], { include: ['./potatoes'], matchWholeWord: true }); + }); + + it('should only apply to sub-folders of given include', done => { + const pattern = 'Copyright'; + + const client = new ResultAccumulator(() => { + const expected: SearchInWorkspaceExpectation[] = [ + { root: rootDirAUri, fileUri: 'test/test-spec.ts', line: 1, character: 5, length: pattern.length, lineText: '' } + ]; + + compareSearchResults(expected, client.results); + done(); + }); + ripgrepServer.setClient(client); + // Matching only the top 'test' folder and not any other 'test' subfolder + ripgrepServer.search(pattern, [rootDirAUri], { include: ['./test'], matchWholeWord: true }); + }); + + it('should apply to all sub-folders of not relative pattern', done => { + const pattern = 'Copyright'; + + const client = new ResultAccumulator(() => { + const expected: SearchInWorkspaceExpectation[] = [ + { root: rootDirAUri, fileUri: 'small/test/test-spec.ts', line: 1, character: 5, length: pattern.length, lineText: '' }, + { root: rootDirAUri, fileUri: 'test/test-spec.ts', line: 1, character: 5, length: pattern.length, lineText: '' } + ]; + + compareSearchResults(expected, client.results); + done(); + }); + ripgrepServer.setClient(client); + // Matching only the top 'test' folder and not any other 'test' subfolder + ripgrepServer.search(pattern, [rootDirAUri], { include: ['test'], matchWholeWord: true }); + }); + + it('should consider "include" string as a file', done => { + const pattern = 'slightly'; + + const client = new ResultAccumulator(() => { + const expected: SearchInWorkspaceExpectation[] = [ + { root: rootDirAUri, fileUri: 'orange', line: 1, character: 27, length: pattern.length, lineText: '' } + ]; + + compareSearchResults(expected, client.results); + done(); + }); + ripgrepServer.setClient(client); + ripgrepServer.search(pattern, [rootDirBUri], { include: ['orange'], matchWholeWord: true }); + }); + + it('should consider "include" string as a folder', done => { + const pattern = 'Most'; + + const client = new ResultAccumulator(() => { + const expected: SearchInWorkspaceExpectation[] = [ + { root: rootDirAUri, fileUri: 'orange/navel', line: 1, character: 5, length: pattern.length, lineText: '' }, + { root: rootDirAUri, fileUri: 'orange/hamlin', line: 1, character: 33, length: pattern.length, lineText: '' } + ]; + + compareSearchResults(expected, client.results); + done(); + }); + ripgrepServer.setClient(client); + ripgrepServer.search(pattern, [rootDirAUri], { include: ['orange'], matchWholeWord: true }); + }); + it('should return 1 result when searching for "test" while ignoring all ".txt" files', done => { const pattern = 'test'; @@ -848,3 +947,120 @@ describe('ripgrep-search-in-workspace-server', function (): void { } }); }); + +describe('#resolvePatternToPathMap', function (): void { + this.timeout(10000); + it('should not resolve paths from a not absolute / relative pattern', function (): void { + const pattern = 'carrots'; + const options = { include: [pattern] }; + const searchPaths = ripgrepServer['resolveSearchPathsFromIncludes']([rootDirA], options); + // Same root directory + expect(searchPaths.length).equal(1); + expect(searchPaths[0]).equal(rootDirA); + + // Pattern is unchanged + expect(options.include.length).equal(1); + expect(options.include[0]).equals(pattern); + }); + + it('should resolve pattern to path for relative filename', function (): void { + const filename = 'carrots'; + const pattern = `./${filename}`; + checkResolvedPathForPattern(pattern, path.join(rootDirA, filename)); + }); + + it('should resolve relative pattern with sub-folders glob', function (): void { + const filename = 'carrots'; + const pattern = `./${filename}/**`; + checkResolvedPathForPattern(pattern, path.join(rootDirA, filename)); + }); + + it('should resolve absolute path pattern', function (): void { + const pattern = `${rootDirA}/carrots`; + checkResolvedPathForPattern(pattern, pattern); + }); +}); + +describe('#patternToGlobCLIArguments', function (): void { + this.timeout(10000); + + it('should resolve path to glob - filename', function (): void { + [true, false].forEach(excludeFlag => { + const excludePrefix = excludeFlag ? '!' : ''; + const filename = 'carrots'; + const expected = [ + `--glob=${excludePrefix}**/${filename}`, + `--glob=${excludePrefix}**/${filename}/*` + ]; + + const actual = ripgrepServer['patternToGlobCLIArguments'](filename, excludeFlag); + expect(expected).to.have.deep.members(actual); + }); + }); + + it('should resolve path to glob - glob prefixed folder', function (): void { + [true, false].forEach(excludeFlag => { + const excludePrefix = excludeFlag ? '!' : ''; + const filename = 'carrots'; + const inputPath = `**/${filename}/`; + const expected = [ + `--glob=${excludePrefix}**/${filename}/`, + `--glob=${excludePrefix}**/${filename}/*` + ]; + + const actual = ripgrepServer['patternToGlobCLIArguments'](inputPath, excludeFlag); + expect(expected).to.have.deep.members(actual); + }); + }); + + it('should resolve path to glob - path segment', function (): void { + [true, false].forEach(excludeFlag => { + const excludePrefix = excludeFlag ? '!' : ''; + const filename = 'carrots'; + const inputPath = `/${filename}`; + const expected = [ + `--glob=${excludePrefix}**/${filename}`, + `--glob=${excludePrefix}**/${filename}/*` + ]; + + const actual = ripgrepServer['patternToGlobCLIArguments'](inputPath, excludeFlag); + expect(expected).to.have.deep.members(actual); + }); + }); + + it('should resolve path to glob - already a glob', function (): void { + [true, false].forEach(excludeFlag => { + const excludePrefix = excludeFlag ? '!' : ''; + const filename = 'carrots'; + const inputPath = `${filename}/**/*`; + const expected = [ + `--glob=${excludePrefix}**/${filename}/**/*`, + ]; + + const actual = ripgrepServer['patternToGlobCLIArguments'](inputPath, excludeFlag); + expect(expected).to.have.deep.members(actual); + }); + }); + + it('should resolve path to glob - path segment glob suffixed', function (): void { + [true, false].forEach(excludeFlag => { + const excludePrefix = excludeFlag ? '!' : ''; + const filename = 'carrots'; + const inputPath = `/${filename}/**/*`; + const expected = [ + `--glob=${excludePrefix}**/${filename}/**/*`, + ]; + + const actual = ripgrepServer['patternToGlobCLIArguments'](inputPath, excludeFlag); + expect(expected).to.have.deep.members(actual); + }); + }); +}); + +function checkResolvedPathForPattern(pattern: string, expectedPath: string): void { + const options = {include: [pattern]}; + const searchPaths = ripgrepServer['resolveSearchPathsFromIncludes']([rootDirA], options); + expect(searchPaths.length).equal(1); + expect(options.include.length).equals(0); + expect(searchPaths[0]).equal(expectedPath); +} diff --git a/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.ts b/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.ts index 21a410bb6790d..4b9425e8f101a 100644 --- a/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.ts +++ b/packages/search-in-workspace/src/node/ripgrep-search-in-workspace-server.ts @@ -20,6 +20,7 @@ import { FileUri } from '@theia/core/lib/node/file-uri'; import URI from '@theia/core/lib/common/uri'; import { inject, injectable } from '@theia/core/shared/inversify'; import { SearchInWorkspaceServer, SearchInWorkspaceOptions, SearchInWorkspaceResult, SearchInWorkspaceClient, LinePreview } from '../common/search-in-workspace-interface'; +import { RipgrepSearchUtils } from './ripgrep-search-utils'; export const RgPath = Symbol('RgPath'); @@ -94,45 +95,96 @@ export class RipgrepSearchInWorkspaceServer implements SearchInWorkspaceServer { } protected getArgs(options?: SearchInWorkspaceOptions): string[] { - const args = ['--hidden', '--json']; - args.push(options && options.matchCase ? '--case-sensitive' : '--ignore-case'); + const args = new Set(); + + const appendGlobArgs = (rawPatterns: string[], exclude: boolean) => { + rawPatterns.forEach(rawPattern => { + if (rawPattern !== '') { + const globArguments = this.patternToGlobCLIArguments(rawPattern, exclude); + globArguments.forEach(arg => args.add(arg)); + } + }); + }; + + args.add('--hidden'); + args.add('--json'); + + if (options && options.matchCase) { + args.add('--case-sensitive'); + } else { + args.add('--ignore-case'); + } + if (options && options.includeIgnored) { - args.push('--no-ignore'); + args.add('--no-ignore'); } if (options && options.maxFileSize) { - args.push('--max-filesize=' + options.maxFileSize.trim()); + args.add('--max-filesize=' + options.maxFileSize.trim()); } else { - args.push('--max-filesize=20M'); + args.add('--max-filesize=20M'); } + if (options && options.include) { - for (const include of options.include) { - if (include !== '') { - args.push('--glob=**/' + include); - } - } + appendGlobArgs(options.include, false); } + if (options && options.exclude) { - for (const exclude of options.exclude) { - if (exclude !== '') { - args.push('--glob=!**/' + exclude); - } - } + appendGlobArgs(options.exclude, true); } + if (options && options.useRegExp || options && options.matchWholeWord) { - args.push('--regexp'); + args.add('--regexp'); } else { - args.push('--fixed-strings'); - args.push('--'); + args.add('--fixed-strings'); + args.add('--'); } - return args; + + return Array.from(args); } - // Search for the string WHAT in directories ROOTURIS. Return the assigned search id. + /** + * Transforms a given file pattern to 'ripgrep' glob CLI arguments. + */ + protected patternToGlobCLIArguments(pattern: string, exclude: boolean): string[] { + const globCommandArgument = '--glob='; + const excludeChar = exclude ? '!' : ''; + const subDirGlobPattern = '**/'; + + const subDirGlobPrefix = pattern.startsWith('/') ? '**' : subDirGlobPattern; + const updatedPattern = pattern.startsWith(subDirGlobPattern) ? pattern : `${subDirGlobPrefix}${pattern}`; + + const globArgument = `${globCommandArgument}${excludeChar}${updatedPattern}`; + + const globArgumentsArray = [globArgument]; + if (!globArgument.endsWith('*')) { + // Add a generic glob CLI argument entry to include files inside a given directory. + const suffix = globArgument.endsWith('/') ? '*' : '/*'; + globArgumentsArray.push(`${globArgument}${suffix}`); + } + + return globArgumentsArray; + }; + + /** + * By default, sets the search directories for the string WHAT to the provided ROOTURIS directories + * and returns the assigned search id. + * + * The include / exclude (options in SearchInWorkspaceOptions) are lists of patterns for files to + * include / exclude during search (glob characters are allowed). + * + * include patterns successfully recognized as absolute paths will override the default search and set + * the search directories to the ones provided as includes. + * Relative paths are allowed, the application will attempt to translate them to valid absolute paths + * based on the applicable search directories. + */ search(what: string, rootUris: string[], opts?: SearchInWorkspaceOptions): Promise { // Start the rg process. Use --vimgrep to get one result per // line, --color=always to get color control characters that // we'll use to parse the lines. const searchId = this.nextSearchId++; + const rootPaths = rootUris.map(root => FileUri.fsPath(root)); + const searchPaths: string[] = this.resolveSearchPathsFromIncludes(rootPaths, opts); + this.includesExcludesToAbsolute(searchPaths, opts); const rgArgs = this.getArgs(opts); // if we use matchWholeWord we use regExp internally, // so, we need to escape regexp characters if we actually not set regexp true in UI. @@ -146,7 +198,7 @@ export class RipgrepSearchInWorkspaceServer implements SearchInWorkspaceServer { } } - const args = [...rgArgs, what].concat(rootUris.map(root => FileUri.fsPath(root))); + const args = [...rgArgs, what, ...searchPaths]; const processOptions: RawProcessOptions = { command: this.rgPath, args @@ -290,6 +342,53 @@ export class RipgrepSearchInWorkspaceServer implements SearchInWorkspaceServer { return Promise.resolve(searchId); } + /** + * The default search paths are set to be the root paths associated to a workspace + * however the search scope can be further refined with the include paths available in the search options. + * This method will replace the searching paths to the ones specified in the 'include' options but as long + * as the 'include' paths can be successfully validated as existing. + * + * Therefore the returned array of paths can be either the workspace root paths or a set of validated paths + * derived from the include options which can be used to perform the search. + * + * Any pattern that resulted in a valid search path will be removed from the 'include' list as it is + * provided as an equivalent search path instead. + */ + protected resolveSearchPathsFromIncludes(rootPaths: string[], opts: SearchInWorkspaceOptions | undefined): string[] { + if (!opts || !opts.include) { + return rootPaths; + } + + const { convertedPatterns, resolvedPaths } = RipgrepSearchUtils.resolvePatternsToPaths(opts.include, rootPaths); + + // Remove file patterns that were successfully translated to search paths. + opts.include = opts.include.filter(item => !convertedPatterns.has(item)); + + return resolvedPaths.size > 0 ? Array.from(resolvedPaths) : rootPaths; + } + + /** + * Transform include/exclude option patterns from relative patterns to absolute patterns. + * E.g. './abc/foo.*' to '${root}/abc/foo.*', the transformation does not validate the + * pattern against the file system as glob suffixes remain. + */ + protected includesExcludesToAbsolute(searchPaths: string[], opts: SearchInWorkspaceOptions | undefined): void { + [true, false].forEach(isInclude => { + const patterns = isInclude ? opts?.include : opts?.exclude; + if (!patterns) { + return; + } + + const updatedPatterns = RipgrepSearchUtils.replaceRelativeToAbsolute(patterns, searchPaths); + + if (isInclude) { + opts!.include = updatedPatterns; + } else { + opts!.exclude = updatedPatterns; + } + }); + } + /** * Returns the root folder uri that a file belongs to. * In case that a file belongs to more than one root folders, returns the root folder that is closest to the file. diff --git a/packages/search-in-workspace/src/node/ripgrep-search-utils.ts b/packages/search-in-workspace/src/node/ripgrep-search-utils.ts new file mode 100644 index 0000000000000..0b6e036a4b472 --- /dev/null +++ b/packages/search-in-workspace/src/node/ripgrep-search-utils.ts @@ -0,0 +1,123 @@ +/******************************************************************************** + * Copyright (C) 2021 Ericsson and others. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the Eclipse + * Public License v. 2.0 are satisfied: GNU General Public License, version 2 + * with the GNU Classpath Exception which is available at + * https://www.gnu.org/software/classpath/license.html. + * + * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 + ********************************************************************************/ +import * as fs from '@theia/core/shared/fs-extra'; +import * as path from 'path'; + +export namespace RipgrepSearchUtils { + /** + * Attempts to resolve valid file paths from a given list of patterns. + * The given search paths are used to try resolving relative path patterns to an absolute path. + * The resulting object will include two sets. + * + * The first set includes all the patterns that were successfully converted to at least one file existing + * in the file system. + * + * The second set includes all validated paths derived from joining search paths with patterns. + */ + export function resolvePatternsToPaths(patterns: string[], searchPaths: string[]): { convertedPatterns: Set, resolvedPaths: Set } { + const convertedPatterns = new Set(); + const resolvedPaths = new Set(); + + patterns.forEach(pattern => { + searchPaths.forEach(root => { + const foundPath = resolveFolderFromGlob(root, pattern); + + if (foundPath) { + convertedPatterns.add(pattern); + resolvedPaths.add(foundPath); + } + }); + }); + + return { convertedPatterns, resolvedPaths }; + } + + /** + * Transforms relative patterns to absolute paths, one for each given search path. + * The resulting paths are not validated in the file system as the pattern keeps glob information. + * + * @returns The resulting list may be larger than the received patterns as a relative pattern may + * resolve to multiple absolute patterns upto the number of search paths. + */ + export function replaceRelativeToAbsolute(patterns: string[], searchPaths: string[]): string[] { + const processedPatterns = new Set(); + + patterns.forEach(pattern => { + searchPaths.forEach(root => { + processedPatterns.add(relativeToAbsolutePattern(root, pattern)); + }); + }); + + return Array.from(processedPatterns); + } + + /** + * Joins the given root and pattern to form an absolute path + * as long as the pattern is in relative form. + * E.g. './foo' becomes '${root}/foo' + */ + function relativeToAbsolutePattern(root: string, pattern: string): string { + if (!isRelativeToBaseDirectory(pattern)) { + // No need to convert to absolute + return pattern; + } + return path.join(root, pattern); + } + + /** + * Checks if the format of a given path represents a relative path within the base directory + */ + function isRelativeToBaseDirectory(filePath: string): boolean { + return filePath.replace(/\\/g, '/').startsWith('./'); + } + + /** + * Attempts to build a valid absolute file or directory from the given pattern and root folder. + * e.g. /a/b/c/foo/** to /a/b/c/foo, or './foo/**' to '${root}/foo'. + * + * @returns the valid path if found existing in the file system. + */ + function resolveFolderFromGlob(root: string, pattern: string): string | undefined { + const patternBase = stripGlobSuffix(pattern); + + if (!path.isAbsolute(patternBase) && !isRelativeToBaseDirectory(patternBase)) { + // The pattern is not referring to a single file or folder, i.e. not to be converted + return undefined; + } + + const targetPath = path.isAbsolute(patternBase) ? patternBase : path.join(root, patternBase); + + if (fs.existsSync(targetPath)) { + return targetPath; + } + + return undefined; + } + + /** + * Removes a glob suffix from a given pattern (e.g. /a/b/c/**) + * to a directory path (/a/b/c). + * + * @returns the path without the glob suffix, + * else returns the original pattern. + */ + function stripGlobSuffix(pattern: string): string { + const pathParsed = path.parse(pattern); + const suffix = pathParsed.base; + + return suffix === '**' ? pathParsed.dir : pattern; + } +}