diff --git a/bin/peptfilter.ts b/bin/peptfilter.ts new file mode 100755 index 00000000..007dfdba --- /dev/null +++ b/bin/peptfilter.ts @@ -0,0 +1,6 @@ +#!/usr/bin/env node + +import { Peptfilter } from '../lib/commands/peptfilter.js'; + +const command = new Peptfilter(); +command.run(); diff --git a/jest.config.ts b/jest.config.ts index 9627c0f6..9b51f878 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -104,7 +104,7 @@ const config: Config = { // notifyMode: "failure-change", // A preset that is used as a base for Jest's configuration - // preset: undefined, + preset: 'ts-jest/presets/default-esm', // Run tests from one or more projects // projects: undefined, diff --git a/lib/commands/base_command.ts b/lib/commands/base_command.ts index 90897abe..35c7b795 100644 --- a/lib/commands/base_command.ts +++ b/lib/commands/base_command.ts @@ -1,5 +1,5 @@ import { Command } from "commander"; -import { version } from '../../package.json'; +import { readFileSync } from "fs"; /** * This is a base class which provides a common interface for all commands. @@ -11,8 +11,10 @@ import { version } from '../../package.json'; export abstract class BaseCommand { public program: Command; args: string[] | undefined; + version: string; constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) { + this.version = JSON.parse(readFileSync(new URL("../../package.json", import.meta.url), "utf8")).version; this.program = this.create(options); this.args = options?.args; } @@ -37,8 +39,7 @@ export abstract class BaseCommand { writeErr: () => { } }); } - - program.version(version); + program.version(this.version); return program; } diff --git a/lib/commands/peptfilter.ts b/lib/commands/peptfilter.ts new file mode 100644 index 00000000..f8435dc4 --- /dev/null +++ b/lib/commands/peptfilter.ts @@ -0,0 +1,67 @@ +import { createInterface } from 'node:readline'; +import { BaseCommand } from './base_command.js'; + +export class Peptfilter extends BaseCommand { + + readonly description = `The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input. + +The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.`; + + constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) { + super(options); + + this.program + .summary("Filter peptides based on specific criteria.") + .description(this.description) + .option("--minlen ", "only retain peptides having at least this many amino acids", (d) => parseInt(d, 10), 5) + .option("--maxlen ", "only retain peptides having at most this many amino acids", (d) => parseInt(d, 10), 50) + .option("-l, --lacks ", "only retain peptides that lack all of the specified amino acids", (d) => d.split("")) + .option("-c, --contains ", "only retain peptides that contain all of the specified amino acids", (d) => d.split("")); + } + + /** + * Performance note: this implementation takes 4 seconds to run on swissprot. It can be made faster by using line events instead of + * async iterators. This alternative implementation runs in 2.5 seconds. However, I decided that the async iterator implementation is + * both more readable and more in line with the implementation of the other commands. + */ + async run() { + this.parseArguments(); + const minLen = this.program.opts().minlen; + const maxlen = this.program.opts().maxlen; + const lacks = this.program.opts().lacks || []; + const contains = this.program.opts().contains || []; + + // buffering output makes a big difference in performance + let output = []; + let i = 0; + + for await (const line of createInterface({ input: process.stdin })) { + i++; + if (line.startsWith(">")) { // pass through FASTA headers + output.push(line); + } else if (Peptfilter.checkLength(line, minLen, maxlen) && Peptfilter.checkLacks(line, lacks) && Peptfilter.checkContains(line, contains)) { + output.push(line); + } + if (i % 1000 === 0) { + output.push(""); //add a newline at the end of the buffer without additional string copy + process.stdout.write(output.join("\n")); + output = []; + } + } + + output.push(""); + process.stdout.write(output.join("\n")); + } + + static checkLength(line: string, minLen: number, maxlen: number): boolean { + return line.length >= minLen && line.length <= maxlen; + } + + static checkLacks(line: string, lacks: string[]): boolean { + return lacks.every((aa: string) => !line.includes(aa)); + } + + static checkContains(line: string, contains: string[]): boolean { + return contains.every((aa: string) => line.includes(aa)); + } +} diff --git a/package.json b/package.json index 71171b87..ddaa71bc 100644 --- a/package.json +++ b/package.json @@ -8,13 +8,15 @@ "private": false, "type": "module", "bin": { + "peptfilter": "./bin/peptfilter.js", "uniprot": "./bin/uniprot.js" }, "scripts": { "build": "yarn run tsc", "lint": "yarn run eslint", - "test": "yarn run jest", + "test": "NODE_OPTIONS='--experimental-vm-modules --no-warnings' yarn run jest", "typecheck": "yarn tsc --skipLibCheck --noEmit", + "peptfilter": "yarn run tsx bin/peptfilter.ts", "uniprot": "yarn run tsx bin/uniprot.ts" }, "dependencies": { diff --git a/tests/commands/peptfilter.test.ts b/tests/commands/peptfilter.test.ts new file mode 100644 index 00000000..0318fde2 --- /dev/null +++ b/tests/commands/peptfilter.test.ts @@ -0,0 +1,99 @@ +import { Peptfilter } from '../../lib/commands/peptfilter'; +import { jest } from '@jest/globals'; +import * as mock from 'mock-stdin'; + +let output: string[]; +let error: string[]; +// eslint-disable-next-line @typescript-eslint/no-unused-vars +const writeSpy = jest + .spyOn(process.stdout, "write") + .mockImplementation((data: unknown) => { output.push(data as string); return true; }); +const errorSpy = jest + .spyOn(process.stderr, "write") + .mockImplementation((data: unknown) => { error.push(data as string); return true; }); + +beforeEach(() => { + output = []; + error = []; +}); + +test('test length filter', async () => { + // min length + expect(Peptfilter.checkLength('AALER', 4, 10)).toBe(true); + expect(Peptfilter.checkLength('AALER', 5, 10)).toBe(true); + expect(Peptfilter.checkLength('AALER', 6, 10)).toBe(false); + + // max length + expect(Peptfilter.checkLength('AALER', 1, 4)).toBe(false); + expect(Peptfilter.checkLength('AALER', 1, 5)).toBe(true); + expect(Peptfilter.checkLength('AALER', 1, 6)).toBe(true); +}); + +test('test lacks filter', async () => { + expect(Peptfilter.checkLacks('AALER', ''.split(""))).toBe(true); + expect(Peptfilter.checkLacks('AALER', 'BCD'.split(""))).toBe(true); + expect(Peptfilter.checkLacks('AALER', 'A'.split(""))).toBe(false); + expect(Peptfilter.checkLacks('AALER', 'AE'.split(""))).toBe(false); +}); + +test('test contains filter', async () => { + expect(Peptfilter.checkContains('AALER', ''.split(""))).toBe(true); + expect(Peptfilter.checkContains('AALER', 'A'.split(""))).toBe(true); + expect(Peptfilter.checkContains('AALER', 'AE'.split(""))).toBe(true); + expect(Peptfilter.checkContains('AALER', 'BCD'.split(""))).toBe(false); + expect(Peptfilter.checkContains('AALER', 'AB'.split(""))).toBe(false); +}); + +test('test default filter from stdin', async () => { + const stdin = mock.stdin(); + + const command = new Peptfilter(); + const run = command.run(); + + stdin.send("AAAA\n"); + stdin.send("AAAAA\n"); + stdin.end(); + + await run; + + expect(errorSpy).toHaveBeenCalledTimes(0); + expect(output.join("").trimEnd().split("\n").length).toBe(1); +}); + +test('test if it passes fasta from stdin', async () => { + const stdin = mock.stdin(); + + const command = new Peptfilter(); + const run = command.run(); + + stdin.send(">AA\n"); + stdin.send("AAA\n"); + stdin.end(); + + await run; + + expect(errorSpy).toHaveBeenCalledTimes(0); + expect(output.join("").trimEnd().split("\n").length).toBe(1); + expect(output[0]).toBe(">AA\n"); +}); + +test('test complex example from stdin', async () => { + const stdin = mock.stdin(); + + const command = new Peptfilter({ args: ["--minlen", "4", "--maxlen", "10", "--lacks", "B", "--contains", "A"] }); + const run = command.run(); + + stdin.send("A\n"); + stdin.send("AAAAAAAAAAA\n"); + stdin.send("AAAAB\n"); + stdin.send("BBBBB\n"); + stdin.send("CCCCC\n"); + stdin.send("CCCCCA\n"); + stdin.end(); + + await run; + + expect(errorSpy).toHaveBeenCalledTimes(0); + expect(output.join("").trimEnd().split("\n").length).toBe(1); + expect(output[0]).toBe("CCCCCA\n"); +}); diff --git a/tests/commands/uniprot.test.ts b/tests/commands/uniprot.test.ts index ca72837b..59ba4f7f 100644 --- a/tests/commands/uniprot.test.ts +++ b/tests/commands/uniprot.test.ts @@ -1,4 +1,5 @@ import { Uniprot } from '../../lib/commands/uniprot'; +import { jest } from '@jest/globals'; import * as mock from 'mock-stdin'; let output: string[];