-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #175 from unipept/next-peptfilter
Port peptfilter to typescript
- Loading branch information
Showing
7 changed files
with
181 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#!/usr/bin/env node | ||
|
||
import { Peptfilter } from '../lib/commands/peptfilter.js'; | ||
|
||
const command = new Peptfilter(); | ||
command.run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import { createInterface } from 'node:readline'; | ||
import { BaseCommand } from './base_command.js'; | ||
|
||
export class Peptfilter extends BaseCommand { | ||
|
||
readonly description = `The peptfilter command filters a list of peptides according to specific criteria. The command expects a list of peptides that are passed to standard input. | ||
The input should have one peptide per line. FASTA headers are preserved in the output, so that peptides remain bundled.`; | ||
|
||
constructor(options?: { exitOverride?: boolean, suppressOutput?: boolean, args?: string[] }) { | ||
super(options); | ||
|
||
this.program | ||
.summary("Filter peptides based on specific criteria.") | ||
.description(this.description) | ||
.option("--minlen <length>", "only retain peptides having at least this many amino acids", (d) => parseInt(d, 10), 5) | ||
.option("--maxlen <length>", "only retain peptides having at most this many amino acids", (d) => parseInt(d, 10), 50) | ||
.option("-l, --lacks <amino acids>", "only retain peptides that lack all of the specified amino acids", (d) => d.split("")) | ||
.option("-c, --contains <amino acids>", "only retain peptides that contain all of the specified amino acids", (d) => d.split("")); | ||
} | ||
|
||
/** | ||
* Performance note: this implementation takes 4 seconds to run on swissprot. It can be made faster by using line events instead of | ||
* async iterators. This alternative implementation runs in 2.5 seconds. However, I decided that the async iterator implementation is | ||
* both more readable and more in line with the implementation of the other commands. | ||
*/ | ||
async run() { | ||
this.parseArguments(); | ||
const minLen = this.program.opts().minlen; | ||
const maxlen = this.program.opts().maxlen; | ||
const lacks = this.program.opts().lacks || []; | ||
const contains = this.program.opts().contains || []; | ||
|
||
// buffering output makes a big difference in performance | ||
let output = []; | ||
let i = 0; | ||
|
||
for await (const line of createInterface({ input: process.stdin })) { | ||
i++; | ||
if (line.startsWith(">")) { // pass through FASTA headers | ||
output.push(line); | ||
} else if (Peptfilter.checkLength(line, minLen, maxlen) && Peptfilter.checkLacks(line, lacks) && Peptfilter.checkContains(line, contains)) { | ||
output.push(line); | ||
} | ||
if (i % 1000 === 0) { | ||
output.push(""); //add a newline at the end of the buffer without additional string copy | ||
process.stdout.write(output.join("\n")); | ||
output = []; | ||
} | ||
} | ||
|
||
output.push(""); | ||
process.stdout.write(output.join("\n")); | ||
} | ||
|
||
static checkLength(line: string, minLen: number, maxlen: number): boolean { | ||
return line.length >= minLen && line.length <= maxlen; | ||
} | ||
|
||
static checkLacks(line: string, lacks: string[]): boolean { | ||
return lacks.every((aa: string) => !line.includes(aa)); | ||
} | ||
|
||
static checkContains(line: string, contains: string[]): boolean { | ||
return contains.every((aa: string) => line.includes(aa)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import { Peptfilter } from '../../lib/commands/peptfilter'; | ||
import { jest } from '@jest/globals'; | ||
import * as mock from 'mock-stdin'; | ||
|
||
let output: string[]; | ||
let error: string[]; | ||
// eslint-disable-next-line @typescript-eslint/no-unused-vars | ||
const writeSpy = jest | ||
.spyOn(process.stdout, "write") | ||
.mockImplementation((data: unknown) => { output.push(data as string); return true; }); | ||
const errorSpy = jest | ||
.spyOn(process.stderr, "write") | ||
.mockImplementation((data: unknown) => { error.push(data as string); return true; }); | ||
|
||
beforeEach(() => { | ||
output = []; | ||
error = []; | ||
}); | ||
|
||
test('test length filter', async () => { | ||
// min length | ||
expect(Peptfilter.checkLength('AALER', 4, 10)).toBe(true); | ||
expect(Peptfilter.checkLength('AALER', 5, 10)).toBe(true); | ||
expect(Peptfilter.checkLength('AALER', 6, 10)).toBe(false); | ||
|
||
// max length | ||
expect(Peptfilter.checkLength('AALER', 1, 4)).toBe(false); | ||
expect(Peptfilter.checkLength('AALER', 1, 5)).toBe(true); | ||
expect(Peptfilter.checkLength('AALER', 1, 6)).toBe(true); | ||
}); | ||
|
||
test('test lacks filter', async () => { | ||
expect(Peptfilter.checkLacks('AALER', ''.split(""))).toBe(true); | ||
expect(Peptfilter.checkLacks('AALER', 'BCD'.split(""))).toBe(true); | ||
expect(Peptfilter.checkLacks('AALER', 'A'.split(""))).toBe(false); | ||
expect(Peptfilter.checkLacks('AALER', 'AE'.split(""))).toBe(false); | ||
}); | ||
|
||
test('test contains filter', async () => { | ||
expect(Peptfilter.checkContains('AALER', ''.split(""))).toBe(true); | ||
expect(Peptfilter.checkContains('AALER', 'A'.split(""))).toBe(true); | ||
expect(Peptfilter.checkContains('AALER', 'AE'.split(""))).toBe(true); | ||
expect(Peptfilter.checkContains('AALER', 'BCD'.split(""))).toBe(false); | ||
expect(Peptfilter.checkContains('AALER', 'AB'.split(""))).toBe(false); | ||
}); | ||
|
||
test('test default filter from stdin', async () => { | ||
const stdin = mock.stdin(); | ||
|
||
const command = new Peptfilter(); | ||
const run = command.run(); | ||
|
||
stdin.send("AAAA\n"); | ||
stdin.send("AAAAA\n"); | ||
stdin.end(); | ||
|
||
await run; | ||
|
||
expect(errorSpy).toHaveBeenCalledTimes(0); | ||
expect(output.join("").trimEnd().split("\n").length).toBe(1); | ||
}); | ||
|
||
test('test if it passes fasta from stdin', async () => { | ||
const stdin = mock.stdin(); | ||
|
||
const command = new Peptfilter(); | ||
const run = command.run(); | ||
|
||
stdin.send(">AA\n"); | ||
stdin.send("AAA\n"); | ||
stdin.end(); | ||
|
||
await run; | ||
|
||
expect(errorSpy).toHaveBeenCalledTimes(0); | ||
expect(output.join("").trimEnd().split("\n").length).toBe(1); | ||
expect(output[0]).toBe(">AA\n"); | ||
}); | ||
|
||
test('test complex example from stdin', async () => { | ||
const stdin = mock.stdin(); | ||
|
||
const command = new Peptfilter({ args: ["--minlen", "4", "--maxlen", "10", "--lacks", "B", "--contains", "A"] }); | ||
const run = command.run(); | ||
|
||
stdin.send("A\n"); | ||
stdin.send("AAAAAAAAAAA\n"); | ||
stdin.send("AAAAB\n"); | ||
stdin.send("BBBBB\n"); | ||
stdin.send("CCCCC\n"); | ||
stdin.send("CCCCCA\n"); | ||
stdin.end(); | ||
|
||
await run; | ||
|
||
expect(errorSpy).toHaveBeenCalledTimes(0); | ||
expect(output.join("").trimEnd().split("\n").length).toBe(1); | ||
expect(output[0]).toBe("CCCCCA\n"); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters