Skip to content

Commit

Permalink
Merge pull request #175 from unipept/next-peptfilter
Browse files Browse the repository at this point in the history
Port peptfilter to typescript
  • Loading branch information
bmesuere authored Jun 21, 2024
2 parents b0c9868 + eb8fb5d commit b071301
Show file tree
Hide file tree
Showing 7 changed files with 181 additions and 5 deletions.
6 changes: 6 additions & 0 deletions bin/peptfilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env node

import { Peptfilter } from '../lib/commands/peptfilter.js';

const command = new Peptfilter();
command.run();
2 changes: 1 addition & 1 deletion jest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ const config: Config = {
// notifyMode: "failure-change",

// A preset that is used as a base for Jest's configuration
// preset: undefined,
preset: 'ts-jest/presets/default-esm',

// Run tests from one or more projects
// projects: undefined,
Expand Down
7 changes: 4 additions & 3 deletions lib/commands/base_command.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Command } from "commander";
import { version } from '../../package.json';
import { readFileSync } from "fs";

/**
* This is a base class which provides a common interface for all commands.
Expand All @@ -11,8 +11,10 @@ import { version } from '../../package.json';
export abstract class BaseCommand {
public program: Command;
args: string[] | undefined;
version: string;

constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) {
this.version = JSON.parse(readFileSync(new URL("../../package.json", import.meta.url), "utf8")).version;
this.program = this.create(options);
this.args = options?.args;
}
Expand All @@ -37,8 +39,7 @@ export abstract class BaseCommand {
writeErr: () => { }
});
}

program.version(version);
program.version(this.version);

return program;
}
Expand Down
67 changes: 67 additions & 0 deletions lib/commands/peptfilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { createInterface } from 'node:readline';
import { BaseCommand } from './base_command.js';

export class Peptfilter extends BaseCommand {

readonly description = `The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input.
The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.`;

constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) {
super(options);

this.program
.summary("Filter peptides based on specific criteria.")
.description(this.description)
.option("--minlen <length>", "only retain peptides having at least this many amino acids", (d) => parseInt(d, 10), 5)
.option("--maxlen <length>", "only retain peptides having at most this many amino acids", (d) => parseInt(d, 10), 50)
.option("-l, --lacks <amino acids>", "only retain peptides that lack all of the specified amino acids", (d) => d.split(""))
.option("-c, --contains <amino acids>", "only retain peptides that contain all of the specified amino acids", (d) => d.split(""));
}

/**
* Performance note: this implementation takes 4 seconds to run on swissprot. It can be made faster by using line events instead of
* async iterators. This alternative implementation runs in 2.5 seconds. However, I decided that the async iterator implementation is
* both more readable and more in line with the implementation of the other commands.
*/
async run() {
this.parseArguments();
const minLen = this.program.opts().minlen;
const maxlen = this.program.opts().maxlen;
const lacks = this.program.opts().lacks || [];
const contains = this.program.opts().contains || [];

// buffering output makes a big difference in performance
let output = [];
let i = 0;

for await (const line of createInterface({ input: process.stdin })) {
i++;
if (line.startsWith(">")) { // pass through FASTA headers
output.push(line);
} else if (Peptfilter.checkLength(line, minLen, maxlen) && Peptfilter.checkLacks(line, lacks) && Peptfilter.checkContains(line, contains)) {
output.push(line);
}
if (i % 1000 === 0) {
output.push(""); //add a newline at the end of the buffer without additional string copy
process.stdout.write(output.join("\n"));
output = [];
}
}

output.push("");
process.stdout.write(output.join("\n"));
}

static checkLength(line: string, minLen: number, maxlen: number): boolean {
return line.length >= minLen && line.length <= maxlen;
}

static checkLacks(line: string, lacks: string[]): boolean {
return lacks.every((aa: string) => !line.includes(aa));
}

static checkContains(line: string, contains: string[]): boolean {
return contains.every((aa: string) => line.includes(aa));
}
}
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
"private": false,
"type": "module",
"bin": {
"peptfilter": "./bin/peptfilter.js",
"uniprot": "./bin/uniprot.js"
},
"scripts": {
"build": "yarn run tsc",
"lint": "yarn run eslint",
"test": "yarn run jest",
"test": "NODE_OPTIONS='--experimental-vm-modules --no-warnings' yarn run jest",
"typecheck": "yarn tsc --skipLibCheck --noEmit",
"peptfilter": "yarn run tsx bin/peptfilter.ts",
"uniprot": "yarn run tsx bin/uniprot.ts"
},
"dependencies": {
Expand Down
99 changes: 99 additions & 0 deletions tests/commands/peptfilter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { Peptfilter } from '../../lib/commands/peptfilter';
import { jest } from '@jest/globals';
import * as mock from 'mock-stdin';

let output: string[];
let error: string[];
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const writeSpy = jest
.spyOn(process.stdout, "write")
.mockImplementation((data: unknown) => { output.push(data as string); return true; });
const errorSpy = jest
.spyOn(process.stderr, "write")
.mockImplementation((data: unknown) => { error.push(data as string); return true; });

beforeEach(() => {
output = [];
error = [];
});

test('test length filter', async () => {
// min length
expect(Peptfilter.checkLength('AALER', 4, 10)).toBe(true);
expect(Peptfilter.checkLength('AALER', 5, 10)).toBe(true);
expect(Peptfilter.checkLength('AALER', 6, 10)).toBe(false);

// max length
expect(Peptfilter.checkLength('AALER', 1, 4)).toBe(false);
expect(Peptfilter.checkLength('AALER', 1, 5)).toBe(true);
expect(Peptfilter.checkLength('AALER', 1, 6)).toBe(true);
});

test('test lacks filter', async () => {
expect(Peptfilter.checkLacks('AALER', ''.split(""))).toBe(true);
expect(Peptfilter.checkLacks('AALER', 'BCD'.split(""))).toBe(true);
expect(Peptfilter.checkLacks('AALER', 'A'.split(""))).toBe(false);
expect(Peptfilter.checkLacks('AALER', 'AE'.split(""))).toBe(false);
});

test('test contains filter', async () => {
expect(Peptfilter.checkContains('AALER', ''.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'A'.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'AE'.split(""))).toBe(true);
expect(Peptfilter.checkContains('AALER', 'BCD'.split(""))).toBe(false);
expect(Peptfilter.checkContains('AALER', 'AB'.split(""))).toBe(false);
});

test('test default filter from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter();
const run = command.run();

stdin.send("AAAA\n");
stdin.send("AAAAA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
});

test('test if it passes fasta from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter();
const run = command.run();

stdin.send(">AA\n");
stdin.send("AAA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
expect(output[0]).toBe(">AA\n");
});

test('test complex example from stdin', async () => {
const stdin = mock.stdin();

const command = new Peptfilter({ args: ["--minlen", "4", "--maxlen", "10", "--lacks", "B", "--contains", "A"] });
const run = command.run();

stdin.send("A\n");
stdin.send("AAAAAAAAAAA\n");
stdin.send("AAAAB\n");
stdin.send("BBBBB\n");
stdin.send("CCCCC\n");
stdin.send("CCCCCA\n");
stdin.end();

await run;

expect(errorSpy).toHaveBeenCalledTimes(0);
expect(output.join("").trimEnd().split("\n").length).toBe(1);
expect(output[0]).toBe("CCCCCA\n");
});
1 change: 1 addition & 0 deletions tests/commands/uniprot.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Uniprot } from '../../lib/commands/uniprot';
import { jest } from '@jest/globals';
import * as mock from 'mock-stdin';

let output: string[];
Expand Down

0 comments on commit b071301

Please sign in to comment.