Skip to content

Commit

Permalink
chore: try testing tries
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Apr 21, 2024
1 parent f0b64a1 commit 08236ac
Show file tree
Hide file tree
Showing 10 changed files with 78,051 additions and 63 deletions.
77,766 changes: 77,766 additions & 0 deletions ngram/1-gram.tsv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"as-table": "^1.0.55",
"chalk": "^5.3.0",
"commander": "^12.0.0",
"cspell-trie-lib": "^8.7.0",
"globby": "^14.0.1",
"lorem-ipsum": "^2.0.8",
"ora": "^8.0.1"
Expand Down
6 changes: 3 additions & 3 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

84 changes: 50 additions & 34 deletions src/app.mts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import './perf-suites/measureAnonymous.mjs';
import './perf-suites/measureMap.mjs';
import './perf-suites/measureSearch.mjs';
import './perf-suites/measureAnonymous.perf.mjs';
import './perf-suites/measureMap.perf.mjs';
import './perf-suites/measureSearch.perf.mjs';
import './perf-suites/trie.perf.mjs';

import { fileURLToPath } from 'node:url';

Expand All @@ -15,57 +16,39 @@ const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

interface AppOptions {
repeat?: number;
timeout?: number;
all?: boolean;
}

export async function app(program = defaultCommand): Promise<Command> {
const suites = getActiveSuites();
const setOfSuiteNames = new Set(suites.map((suite) => suite.name));
const suitesNames = [...setOfSuiteNames, 'all'];

const argument = new Argument('[test-suite...]', 'list of test suites to run');
argument.choices(suitesNames);
argument.variadic = true;

program
.name('perf runner')
.addArgument(argument)
.description('Run performance tests.')
.option('-a, --all', 'run all tests', false)
.option('--repeat <count>', 'repeat the tests', (v) => Number(v), 1)
.option('-t, --timeout <timeout>', 'timeout for each test', (v) => Number(v), 1000)
.action(async (suiteNamesToRun: string[], options: AppOptions) => {
// console.log('Options: %o', optionsCli);
const timeout = options.timeout || 1000;
const suitesRun = new Set<PerfSuite>();

async function _runSuite(suites: PerfSuite[]) {
for (const suite of suites) {
if (suitesRun.has(suite)) continue;
suitesRun.add(suite);
console.log(chalk.green(`Running Perf Suite: ${suite.name}`));
await suite.setTimeout(timeout).runTests();
}
}
const suites = getActiveSuites();

async function runSuite(name: string) {
if (name === 'all') {
await _runSuite(suites);
return;
}
const matching = suites.filter((suite) => suite.name === name);
if (!matching.length) {
console.log(chalk.red(`Unknown test method: ${name}`));
return;
}
await _runSuite(matching);
}
let numSuitesRun = 0;
let showRepeatMsg = false;

for (const name of suiteNamesToRun) {
await runSuite(name);
for (let repeat = options.repeat || 1; repeat > 0; repeat--) {
if (showRepeatMsg) {
console.log(chalk.yellow(`Repeating tests: ${repeat} more time${repeat > 1 ? 's' : ''}.`));
}
numSuitesRun = await runTestSuites(suites, suiteNamesToRun, options);
if (!numSuitesRun) break;
showRepeatMsg = true;
}

if (!suitesRun.size) {
if (!numSuitesRun) {
console.log(chalk.red('No suites to run.'));
console.log(chalk.yellow('Available suites:'));
const width = process.stdout.columns || 80;
Expand All @@ -87,6 +70,39 @@ export async function app(program = defaultCommand): Promise<Command> {
return program;
}

async function runTestSuites(suites: PerfSuite[], suiteNamesToRun: string[], options: AppOptions): Promise<number> {
const timeout = options.timeout || 1000;
const suitesRun = new Set<PerfSuite>();

async function _runSuite(suites: PerfSuite[]) {
for (const suite of suites) {
if (suitesRun.has(suite)) continue;
suitesRun.add(suite);
console.log(chalk.green(`Running Perf Suite: ${suite.name}`));
await suite.setTimeout(timeout).runTests();
}
}

async function runSuite(name: string) {
if (name === 'all') {
await _runSuite(suites);
return;
}
const matching = suites.filter((suite) => suite.name.toLowerCase().startsWith(name.toLowerCase()));
if (!matching.length) {
console.log(chalk.red(`Unknown test method: ${name}`));
return;
}
await _runSuite(matching);
}

for (const name of suiteNamesToRun) {
await runSuite(name);
}

return suitesRun.size;
}

export async function run(argv?: string[], program?: Command): Promise<void> {
const prog = await app(program);
await prog.parseAsync(argv);
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { loremIpsum } from 'lorem-ipsum';

import { suite } from '../perfSuite.mjs';
import { SimpleTrie } from '../lib/SimpleTrie.mjs';
import { Trie } from '../lib/Trie.mjs';
import { suite } from '../perfSuite.mjs';

const numWords = 10000;

Expand All @@ -22,7 +22,7 @@ function getData() {
}

for (const numTerms of termNumber) {
suite(`search`, `Search Dictionary, Size of dictionary: ${numTerms}`, async (test) => {
suite(`search-${numTerms}`, `Search Dictionary, Size of dictionary: ${numTerms}`, async (test) => {
const { wordsToSearch, knownWords } = getData();

// test('lorem-ipsum words', () => {
Expand Down
File renamed without changes.
158 changes: 158 additions & 0 deletions src/perf-suites/trie.perf.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import assert from 'node:assert';
import fs from 'node:fs/promises';

import { buildITrieFromWords, buildTrie, buildTrieFast, TrieBuilder } from 'cspell-trie-lib';

import { SimpleTrie } from '../lib/SimpleTrie.mjs';
import { Trie } from '../lib/Trie.mjs';
import { suite } from '../perfSuite.mjs';

const wordsUrl = new URL('../../ngram/1-gram.tsv', import.meta.url);

let words: string[] | undefined = undefined;

suite('trie-insert', 'Insert words into a trie', async (test, { beforeAll }) => {
const words = await loadWords();
const sortedWords = [...words].sort();

beforeAll(() => {
// console.log('words: %o', words);

// warm up the words.
words.forEach((word) => word);
});

test('SimpleTrie', () => {
const trie = new SimpleTrie();
trie.addWords(words);
assert(trie.has('hello'));
return trie;
});

test('Trie', () => {
const trie = new Trie();
trie.addWords(words);
assert(trie.has('hello'));
return trie;
});

test('buildITrieFromWords', () => {
const trie = buildITrieFromWords(words);
assert(trie.has('hello'));
return trie;
});

test('buildTrie', () => {
const trie = buildTrie(words);
assert(trie.has('hello'));
return trie;
});
test('buildTrieFast', () => {
const trie = buildTrieFast(words);
assert(trie.has('hello'));
return trie;
});

test('TrieBuilder.build(false)', () => {
const builder = new TrieBuilder();
builder.insert(words);
const trie = builder.build(false);
assert(trie.has('hello'));
return trie;
});

test('TrieBuilder.build(true)', () => {
const builder = new TrieBuilder();
builder.insert(words);
const trie = builder.build(true);
assert(trie.has('hello'));
return trie;
});

test('TrieBuilder.build(false) sorted', () => {
const builder = new TrieBuilder();
builder.insert(sortedWords);
const trie = builder.build(false);
assert(trie.has('hello'));
return trie;
});
});

const numberOfSearchWords = 1000;

suite('trie-search', 'Search for words in a trie', async (_test, { prepare }) => {
const words = [...(await loadWords())].sort();
const searchWords = sampleWords(words, numberOfSearchWords);

prepare(() => new SimpleTrie().addWords(words)).test('SimpleTrie', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => new Trie().addWords(words)).test('Trie', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => buildITrieFromWords(words)).test('buildITrieFromWords', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => buildTrie(words)).test('buildTrie', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => buildTrieFast(words)).test('buildTrieFast', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => {
const builder = new TrieBuilder();
builder.insert(words);
return builder.build(false);
}).test('TrieBuilder.build(false)', (trie) => {
return searchWords.map((word) => trie.has(word));
});

prepare(() => {
const builder = new TrieBuilder();
builder.insert(words);
return builder.build(true);
}).test('TrieBuilder.build(true)', (trie) => {
return searchWords.map((word) => trie.has(word));
});
});

async function loadWords() {
if (words) return words;
const raw = await fs.readFile(wordsUrl, 'utf-8');
const wordFreq = raw
.split('\n')
.map((a) => a.trim())
.filter((a) => !a.startsWith('#'))
.filter((a) => a)
.map((a) => a.split('\t'))
.map(([word, freq]) => [word, Number(freq)] as const)
.sort((a, b) => b[1] - a[1]);

// const lines = wordFreq.map(([word, freq]) => {
// const n1 = freq.toPrecision(4);
// const n2 = freq.toString(10);
// return `${word}\t${n1.length < n2.length ? n1 : n2}`;
// });

// await fs.writeFile(wordsUrl, lines.join('\n') + '\n');

words = wordFreq.map(([word]) => word);

Object.freeze(words);

return words;
}

function sampleWords(words: string[], n: number) {
const samples: string[] = [];
for (let i = 0; i < n; i++) {
const r = Math.floor(Math.random() * words.length);
samples.push(words[r]);
}
return samples;
}
Loading

0 comments on commit 08236ac

Please sign in to comment.