diff --git a/package-lock.json b/package-lock.json index edde512..7d81289 100644 --- a/package-lock.json +++ b/package-lock.json @@ -954,6 +954,12 @@ "undici-types": "~5.26.4" } }, + "node_modules/@types/pako": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/pako/-/pako-2.0.3.tgz", + "integrity": "sha512-bq0hMV9opAcrmE0Byyo0fY3Ew4tgOevJmQ9grUhpXQhYfyLJ1Kqg3P33JT5fdbT2AjeAjR51zqqVjAL/HMkx7Q==", + "dev": true + }, "node_modules/@types/semver": { "version": "7.5.8", "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.8.tgz", @@ -1383,8 +1389,7 @@ "node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/array-union": { "version": "2.1.0", @@ -1420,6 +1425,10 @@ "resolved": "packages/core", "link": true }, + "node_modules/boox-cli": { + "resolved": "packages/cli", + "link": true + }, "node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -1505,6 +1514,31 @@ "node": "*" } }, + "node_modules/cli-cursor": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-4.0.0.tgz", + "integrity": "sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==", + "dependencies": { + "restore-cursor": "^4.0.0" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/cli-spinners": { + "version": "2.9.2", + "resolved": "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.9.2.tgz", + "integrity": "sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==", + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", @@ -1849,6 +1883,11 @@ "node": "*" } }, + "node_modules/emoji-regex": { + "version": "10.3.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.3.0.tgz", + "integrity": "sha512-QpLs9D9v9kArv4lfDEgg1X/gN5XLnf/A6l9cs8SPZLRZR3ZkY9+kwIQTxm+fsSej5UMYGE8fdoaZVIBlqG0XTw==" + }, "node_modules/esbuild": { "version": "0.20.2", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", @@ -2255,6 +2294,17 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/get-east-asian-width": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/get-east-asian-width/-/get-east-asian-width-1.2.0.tgz", + "integrity": "sha512-2nk+7SIVb14QrgXFHcm84tD4bKQz0RxPuMT8Ag5KPOq7J5fEmAg0UbXdTOSHqNuHSU28k55qnceesxXRZGzKWA==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/get-func-name": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.2.tgz", @@ -2467,6 +2517,17 @@ "node": ">=0.10.0" } }, + "node_modules/is-interactive": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-interactive/-/is-interactive-2.0.0.tgz", + "integrity": "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/is-number": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", @@ -2497,6 +2558,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-unicode-supported": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-2.0.0.tgz", + "integrity": "sha512-FRdAyx5lusK1iHG0TWpVtk9+1i+GjrzRffhDg4ovQ7mcidMQ6mj+MhKPmvh7Xwyv5gIS06ns49CA7Sqg7lC22Q==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -2563,7 +2635,6 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", - "dev": true, "dependencies": { "argparse": "^2.0.1" }, @@ -2611,6 +2682,14 @@ "node": ">= 0.8.0" } }, + "node_modules/loadee": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/loadee/-/loadee-3.1.1.tgz", + "integrity": "sha512-6SjPpcZMXUqJVQNh3LSGhQ2909lafZzvdKguq0qkGUnw8QMO08eUycs6KcK3Z9s6sPyEMqaI5cf8j5KWD7xRxw==", + "dependencies": { + "js-yaml": "^4.1.0" + } + }, "node_modules/local-pkg": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/local-pkg/-/local-pkg-0.5.0.tgz", @@ -2648,6 +2727,43 @@ "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, + "node_modules/log-symbols": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-6.0.0.tgz", + "integrity": "sha512-i24m8rpwhmPIS4zscNzK6MSEhk0DUWa/8iYQWxhffV8jkI4Phvs3F+quL5xvS0gdQR0FyTCMMH33Y78dDTzzIw==", + "dependencies": { + "chalk": "^5.3.0", + "is-unicode-supported": "^1.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/log-symbols/node_modules/chalk": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", + "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/log-symbols/node_modules/is-unicode-supported": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-1.3.0.tgz", + "integrity": "sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/loupe": { "version": "2.3.7", "resolved": "https://registry.npmjs.org/loupe/-/loupe-2.3.7.tgz", @@ -2866,6 +2982,64 @@ "node": ">= 0.8.0" } }, + "node_modules/ora": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/ora/-/ora-8.0.1.tgz", + "integrity": "sha512-ANIvzobt1rls2BDny5fWZ3ZVKyD6nscLvfFRpQgfWsythlcsVUC9kL0zq6j2Z5z9wwp1kd7wpsD/T9qNPVLCaQ==", + "dependencies": { + "chalk": "^5.3.0", + "cli-cursor": "^4.0.0", + "cli-spinners": "^2.9.2", + "is-interactive": "^2.0.0", + "is-unicode-supported": "^2.0.0", + "log-symbols": "^6.0.0", + "stdin-discarder": "^0.2.1", + "string-width": "^7.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ora/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ora/node_modules/chalk": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", + "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/ora/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/p-limit": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", @@ -2896,6 +3070,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/pako": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/pako/-/pako-2.1.0.tgz", + "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -3095,6 +3274,14 @@ } ] }, + "node_modules/rcfy": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/rcfy/-/rcfy-2.1.0.tgz", + "integrity": "sha512-L3lD/G2hmHh8pHD3WP4Q4bgaHQW5WRh/RKThWLdJDla0vTk6vDv586i1AFy+prQ63vvV8Txf9A2GKdNEb69W2A==", + "dependencies": { + "loadee": "^3.1.1" + } + }, "node_modules/react-is": { "version": "18.3.1", "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", @@ -3155,6 +3342,48 @@ "node": ">= 6" } }, + "node_modules/restore-cursor": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-4.0.0.tgz", + "integrity": "sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==", + "dependencies": { + "onetime": "^5.1.0", + "signal-exit": "^3.0.2" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/restore-cursor/node_modules/mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "engines": { + "node": ">=6" + } + }, + "node_modules/restore-cursor/node_modules/onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "dependencies": { + "mimic-fn": "^2.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/restore-cursor/node_modules/signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" + }, "node_modules/reusify": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", @@ -3334,6 +3563,58 @@ "integrity": "sha512-JPbdCEQLj1w5GilpiHAx3qJvFndqybBysA3qUOnznweH4QbNYUsW/ea8QzSrnh0vNsezMMw5bcVool8lM0gwzg==", "dev": true }, + "node_modules/stdin-discarder": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/stdin-discarder/-/stdin-discarder-0.2.2.tgz", + "integrity": "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.1.0.tgz", + "integrity": "sha512-SEIJCWiX7Kg4c129n48aDRwLbFb2LJmXXFrWBG4NGaRtMQ3myKPKbwrD1BKqQn74oCoNMBVrfDEr5M9YxCsrkw==", + "dependencies": { + "emoji-regex": "^10.3.0", + "get-east-asian-width": "^1.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/string-width/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", @@ -3788,6 +4069,30 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "packages/cli": { + "version": "0.0.0-development", + "license": "MIT", + "dependencies": { + "commander": "^12.0.0", + "ora": "^8.0.1", + "pako": "^2.1.0", + "rcfy": "^2.1.0" + }, + "bin": { + "boox-cli": "dist/index.js" + }, + "devDependencies": { + "@types/pako": "^2.0.3" + } + }, + "packages/cli/node_modules/commander": { + "version": "12.0.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.0.0.tgz", + "integrity": "sha512-MwVNWlYjDTtOjX5PiD7o5pK0UrFU/OYgcJfjjK4RaHZETNtjJqrZa9Y9ds88+A+f+d5lv+561eZ+yCKoS3gbAA==", + "engines": { + "node": ">=18" + } + }, "packages/core": { "name": "boox", "version": "0.0.0-development", diff --git a/package.json b/package.json index 2a8ce3f..f0c1c61 100644 --- a/package.json +++ b/package.json @@ -41,11 +41,11 @@ "prebuild:doc": "npm run train -w boox-site", "build:doc": "npm run build -w boox-site", "build:examples": "npm run build -w examples-vanilla -w examples-react -w examples-vue -w examples-svelte -w examples-nextjs", - "build": "npm run build -w boox", - "types": "npm run types -w boox", - "lint": "npm run lint -w boox", - "test": "npm test -w boox", - "coverage": "npm run coverage -w boox", + "build": "npm run build -w boox -w boox-cli", + "types": "npm run types -w boox -w boox-cli", + "lint": "npm run lint -w boox -w boox-cli", + "test": "npm test -w boox -w boox-cli", + "coverage": "npm run coverage -w boox -w boox-cli", "format": "npm run format --workspaces --if-present" }, "devDependencies": { diff --git a/packages/cli/changelog.md b/packages/cli/changelog.md new file mode 100644 index 0000000..e69de29 diff --git a/packages/cli/package.json b/packages/cli/package.json new file mode 100644 index 0000000..2a5d9db --- /dev/null +++ b/packages/cli/package.json @@ -0,0 +1,48 @@ +{ + "name": "boox-cli", + "description": "A command-line interface (CLI) for training and searching Boox datasets.", + "version": "0.0.0-development", + "publishConfig": { + "access": "public" + }, + "author": "Beni Arisandi (https://stilearning.com)", + "repository": "https://github.com/bent10/boox", + "homepage": "https://stilearning.com/boox", + "license": "MIT", + "keywords": [ + "boox", + "train", + "trainer", + "document", + "index", + "indexing", + "nlp" + ], + "type": "module", + "bin": "dist/index.js", + "files": [ + "dist", + "changelog.md", + "readme.md" + ], + "scripts": { + "start": "vite", + "dev": "vite build --watch", + "build": "vite build && npm run chmod", + "test": "vitest", + "bench": "vitest bench", + "coverage": "vitest run --coverage", + "lint": "tsc --noEmit && eslint . --cache --cache-location ../../node_modules/.eslint", + "format": "prettier . --write --cache-location ../../node_modules/.prettier --ignore-path ../../.prettierignore", + "chmod": "chmod +x dist/index.js" + }, + "dependencies": { + "commander": "^12.0.0", + "ora": "^8.0.1", + "pako": "^2.1.0", + "rcfy": "^2.1.0" + }, + "devDependencies": { + "@types/pako": "^2.0.3" + } +} diff --git a/packages/cli/readme.md b/packages/cli/readme.md new file mode 100644 index 0000000..d2a0542 --- /dev/null +++ b/packages/cli/readme.md @@ -0,0 +1,131 @@ +# Boox CLI + +A command-line interface (CLI) for training and searching [Boox](https://github.com/bent10/boox) datasets. + +## Installation + +Install `boox-cli` globally using npm or yarn: + +```bash +npm install -g boox-cli + +# Or + +yarn global add boox-cli +``` + +## Usage + +### Training + +To train a Boox dataset, use the train command: + +```bash +boox-cli train [destination] [options] +``` + +- ``: The path to your dataset file (JSON format). +- `[destination]`: (Optional) The path where the trained data will be saved. Defaults to the current directory. + +Options: + +- `-i, --id `: The field in your dataset objects that uniquely identifies each document (default: `'id'`). +- `-f, --features `: The fields to index for search (multiple fields can be specified). +- `-a, --attributes `: The fields to include as-is without indexing (multiple fields can be specified). +- `-d, --deflate`: Compress the trained data as `.dat` file (default: `false`). +- `-c, --cwd `: The working directory (default: current directory). +- `-r, --rcname `: The name of the Boox configuration file (default: `'boox'`). + +Example: + +```bash +boox-cli train data/products.json -f title description -a price +``` + +This command will train a Boox dataset from the `data/products.json` file, indexing the `title` and `description` fields for search and including the `price` field as-is. The trained data will be saved as a compressed `.gz` file. + +### Searching + +To search a trained Boox dataset, use the `search` command: + +```bash +boox-cli search [options] +``` + +- ``: The path to the trained dataset file (`.dat` or `.gz`). +- ``: The search query string. + +Options: + +- `-o, --offset `: The offset for pagination (default: `'1'`). +- `-l, --length `: The number of results per page (default: `'10'`). +- `-k, --context `: Display the context instead of paginated results object. +- `-a, --attrs `: Fields to display when `--context` is provided. +- `-d, --deflate`: Assume the trained data is deflated as `.dat` file (default: `false`). +- `-c, --cwd `: The working directory (default: current directory). +- `-r, --rcname `: The name of the Boox configuration file (default: `'boox'`). + +Example: + +```bash +boox-cli search data/products-trained.gz "shoes" -o 2 -l 20 +``` + +This command will search the `data/products-trained.gz` dataset for documents containing the word `"shoes"`, starting from the second page and displaying 20 results per page. + +## Using configuration file + +You can create a Boox configuration file in your project's root directory to specify default options for the `boox-cli train` and `boox-cli search` commands: + +- `.booxrc` +- `.booxrc.json` +- `.booxrc.{yaml,yml}` +- `.boox.{mjs,cjs,js}` +- `boox.config.{mjs,cjs,js}` + +Before using the example below, make sure to install the required libraries: + +```bash +npm install -D double-metaphone stemmer stopword marked marked-plaintify +``` + +Here's an example of a Boox configuration file: + +```js +// boox.config.js +import { doubleMetaphone } from 'double-metaphone' +import { Marked } from 'marked' +import markedPlaintify from 'marked-plaintify' +import { stemmer } from 'stemmer' +import { removeStopwords } from 'stopword' + +const marked = new Marked({ gfm: true }).use(markedPlaintify()) +const wordRegexp = /\b\w+\b/g + +/** @type {() => import('boox').BooxOptions} */ +export default function defineBooxConfig() { + return { + id: 'customId', + features: ['title', 'content', 'tags'], + attributes: ['author', 'date'], + modelOptions: { + normalizer(input) { + // Remove Markdown formatting + return marked.parse(input) + }, + tokenizer(input) { + const tokens = Array.from(input.match(wordRegexp) || []) + return removeStopwords(tokens) + }, + stemmer: stemmer, + phonetic: doubleMetaphone + } + } +} +``` + +The `--rcname` flag allows you to customize the name of the configuration file. For example, to use a configuration file named `my-appname.config.js`, you would run the following command: + +```bash +boox-cli train src/dataset.json --rcname my-appname +``` diff --git a/packages/cli/src/api.ts b/packages/cli/src/api.ts new file mode 100644 index 0000000..71e1cb3 --- /dev/null +++ b/packages/cli/src/api.ts @@ -0,0 +1,191 @@ +import { access, constants, mkdir, readFile, writeFile } from 'node:fs/promises' +import { basename, dirname, extname, join, relative, resolve } from 'path' +import generateBatches from 'batch-me-up' +import Boox, { type BooxOptions, type Dataset, type SearchOptions } from 'boox' +import { oraPromise } from 'ora' +import { deflate, gzip, inflate, ungzip } from 'pako' +import { loadRc } from 'rcfy' +import { getDataSize, getElapsedTime } from './utils.js' + +export interface Options extends BooxOptions { + isDeflate?: boolean + cwd?: string + rcname?: string +} + +export interface PageOptions { + offset?: string + length?: string +} + +// --- Constants --- +const DEFAULT_COMPRESSION_LEVEL = 6 + +/** + * Trains a Boox dataset and saves the trained data. + * + * @param src Path to the dataset file. + * @param dest Path where the trained data will be saved. + * @param options Training options. + */ +export async function trainDataset( + src: string, + dest: string, + { rcname = 'boox', cwd, ...options }: Options = {} +) { + const resolvedCwd = cwd ? resolve(cwd) : process.cwd() + // Load user config from (e.g. boox.config.js) file, if present + const userConfig: Options = await loadRc(rcname, resolvedCwd) + const { + id = 'id', + features = ['text'], + attributes = [], + modelOptions, + isDeflate = false + } = { ...options, ...userConfig } + + const resolvedSrc = relative(process.cwd(), join(resolvedCwd, src)) + + const trainedFile = join( + dest + ? relative(process.cwd(), join(resolvedCwd, dest)) + : dirname(resolvedSrc), + `${basename(src).replace( + extname(src), + isDeflate ? '-trained.dat' : '-trained.gz' + )}` + ) + + // Create Boox instance + const boox = new Boox({ id, features, attributes, modelOptions }) + + try { + // Read dataset from file + const datasets = await oraPromise( + async () => JSON.parse(await readFile(resolvedSrc, 'utf8')), + { + text: 'Reading data...', + successText(data) { + return `Reading ${getDataSize(data)} data!` + } + } + ) + + const batches = await generateBatches(datasets) + const progress = { + current: 0, + length: datasets.length + } + const startTime = new Date() + + // Train the model in batches + await oraPromise( + ora => { + return Promise.all( + batches.map(batch => + batch.map(dataset => { + progress.current++ + ora.text = `Training ${resolvedSrc} ${progress.current} of ${progress.length} - ${getElapsedTime(startTime)}` + ora.render() + boox.addDocumentSync(dataset) + }) + ) + ) + }, + { + text: 'Start training...', + successText() { + return `Trained ${progress.current} documents in ${getElapsedTime(startTime)}` + } + } + ) + + // Compress and save the trained state + const compressor = isDeflate ? deflate : gzip + const state = JSON.stringify(boox.currentState) + const compressedState = compressor(state, { + level: DEFAULT_COMPRESSION_LEVEL + }) + + await oraPromise( + async () => { + const distDir = dirname(trainedFile) + + try { + await access(distDir, constants.F_OK) + } catch { + await mkdir(distDir, { recursive: true }) + } + + return await writeFile(trainedFile, compressedState) + }, + { + text: 'Saving...', + successText: `Saved ${getDataSize(state)} state to ${trainedFile}` + } + ) + } catch (error) { + throw error + } +} + +/** + * Searches a trained Boox dataset. + * + * @param src Path to the trained dataset file. + * @param query The search query. + * @param options Search options. + */ +export async function searchDataset( + src: string, + query: string, + { + rcname = 'boox', + cwd, + ...options + }: Pick & PageOptions = {} +) { + const resolvedCwd = cwd ? resolve(cwd) : process.cwd() + // Load user config from (e.g. boox.config.js) file, if present + const userConfig: Options = await loadRc(rcname, resolvedCwd) + const { + modelOptions, + isDeflate = false, + offset = 1, + length = 10 + } = { ...options, ...userConfig } + + const resolvedSrc = relative(process.cwd(), join(resolvedCwd, src)) + + try { + // Create Boox instance + const decompressor = isDeflate ? inflate : ungzip + + // Read trained state from file + console.time('Loading state') + const compressedState = await readFile(resolvedSrc) + const decompressedState = decompressor(compressedState, { to: 'string' }) + const boox = new Boox({ modelOptions }) + const state = JSON.parse(decompressedState) + // set state + boox.currentState = state + console.timeEnd('Loading state') + + console.info('State size:', getDataSize(decompressedState)) + + // Perform the search + // Load user config from (e.g. boox-results.config.js) file, if present + const resultsConfig: SearchOptions = await loadRc( + 'boox-results', + resolvedCwd + ) + console.time('Search in') + const results = await boox.search(query, resultsConfig) + console.timeEnd('Search in') + console.log() + + return Boox.paginateSearchResults(results, +offset, +length) + } catch (error) { + throw error + } +} diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts new file mode 100644 index 0000000..cf23c31 --- /dev/null +++ b/packages/cli/src/index.ts @@ -0,0 +1,87 @@ +#!/usr/bin/env node + +import { program } from 'commander' +import { searchDataset, trainDataset } from './api.js' + +// boox-cli train +program + .command('train [destination]') + .description('Train a Boox dataset') + .option('-i, --id ', 'Field to use as document ID', 'id') + .option('-f, --features ', 'Fields to index for search') + .option('-a, --attributes ', 'Fields to include as-is') + .option('-d, --deflate', 'Compress the trained data as a ".dat" file', false) + .option('-c, --cwd ', 'Working directory', process.cwd()) + .option('-r, --rcname ', 'Name of the Boox configuration file', 'boox') + .action(async (src, dest, { deflate, ...options }) => { + try { + await trainDataset(src, dest, { isDeflate: deflate, ...options }) + } catch (error) { + console.error(error) + process.exit(1) + } + }) + +// boox-cli search +program + .command('search ') + .description('Search a trained Boox dataset') + .option('-o, --offset ', 'Offset for pagination', '1') + .option('-l, --length ', 'Number of results per page', '10') + .option( + '-k, --context ', + 'Display the context instead of paginated results object' + ) + .option( + '-a, --attrs ', + 'Fields to display when "--context" is provided' + ) + .option( + '-d, --deflate', + 'Assume the trained data is deflated as ".dat" file', + false + ) + .option('-c, --cwd ', 'Working directory', process.cwd()) + .option('-r, --rcname ', 'Name of the Boox configuration file', 'boox') + .action(async (src, query, { context, attrs = [], deflate, ...options }) => { + try { + const paginateResults = await searchDataset(src, query, { + isDeflate: deflate, + ...options + }) + + if (typeof context === 'string') { + const { currentPage, totalPages, totalResults, results } = + paginateResults + + console.log( + `\nPage ${currentPage} of ${totalPages}, Showing ${results.length} of ${totalResults} results\n` + ) + console.log('='.repeat(30), '\n') + + for (const result of results) { + const [field, maxlength = 160] = context.split('::') + const { keywords, text } = result.context(field, +maxlength) + + const meta = attrs + .map((attr: string) => attr + ': ' + result.attributes[attr]) + .filter(Boolean) + + console.log(...meta, Array.from(keywords), '\n') + console.log(`${text}...`, '\n') + console.log('='.repeat(30), '\n') + } + + console.log( + `Page ${currentPage} of ${totalPages}, Showing ${results.length} of ${totalResults} results\n` + ) + } else { + console.log(paginateResults) + } + } catch (error) { + console.error(error) + process.exit(1) + } + }) + +program.parse() diff --git a/packages/cli/src/utils.ts b/packages/cli/src/utils.ts new file mode 100644 index 0000000..e9055a3 --- /dev/null +++ b/packages/cli/src/utils.ts @@ -0,0 +1,68 @@ +/** + * Gets the elapsed time since a task started. + * + * @param startTime The start time. + * @returns The elapsed time in human-readable format. + */ +export function getElapsedTime(startTime: Date) { + if (!startTime) { + return '0s' + } + + const elapsedTime = new Date().getTime() - startTime.getTime() + return formatTime(elapsedTime) +} + +/** + * Converts time (in milliseconds) into a human-readable format. + * + * @param time The time in milliseconds. + * @returns The formatted time string. + */ +function formatTime(time: number) { + const totalSeconds = Math.floor(time / 1000) + const hours = Math.floor(totalSeconds / 3600) + const minutes = Math.floor((totalSeconds % 3600) / 60) + const seconds = totalSeconds % 60 + + const formattedTime = [ + hours > 0 ? `${hours.toString().padStart(2, '0')}h` : '', + minutes > 0 ? `${minutes.toString().padStart(2, '0')}m` : '', + `${seconds.toString().padStart(2, '0')}s` + ] + .filter(Boolean) + .join(' ') + + return formattedTime +} + +/** + * Calculates the size of a data in bytes and returns it in a human-readable format. + * + * @param data The data to calculate the size of. + * @returns The data size in a human-readable format (e.g., "1.23 MB"). + */ +export function getDataSize(data: string | object) { + const dataStr = typeof data === 'string' ? data : JSON.stringify(data) + const sizeInBytes = dataStr.length + return formatBytes(sizeInBytes) +} + +/** + * Converts a number of bytes into a human-readable format. + * + * @param bytes The number of bytes to format. + * @param decimals The number of decimal places to include (optional). + * @returns The formatted size string (e.g., "1.23 MB"). + */ +function formatBytes(bytes: number, decimals = 2) { + if (bytes === 0) return '0 Bytes' + + const k = 1024 + const dm = decimals < 0 ? 0 : decimals + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] + + const i = Math.floor(Math.log(bytes) / Math.log(k)) + + return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i] +} diff --git a/packages/cli/test/fixtures/boox.config.js b/packages/cli/test/fixtures/boox.config.js new file mode 100644 index 0000000..85e362c --- /dev/null +++ b/packages/cli/test/fixtures/boox.config.js @@ -0,0 +1,5 @@ +/** @type {import('boox').BooxOptions} */ +export default { + features: ['name', 'set_name', 'caption'], + attributes: ['image_url', 'hp'] +} diff --git a/packages/cli/test/fixtures/pokemon-trained.json b/packages/cli/test/fixtures/pokemon-trained.json new file mode 100644 index 0000000..acbc7af --- /dev/null +++ b/packages/cli/test/fixtures/pokemon-trained.json @@ -0,0 +1 @@ +{"configs":{"id":"id","features":["name","set_name","caption"],"attributes":["image_url","hp"]},"documents":{"pl3-1":{"id":"pl3-1","attributes":{"name":"Absol G","set_name":"Supreme Victors","caption":"A Basic, SP Pokemon Card of type Darkness with the title Absol G and 70 HP of rarity Rare Holo from the set Supreme Victors. It has the attack Feint Attack with the cost Darkness, the energy cost 1 with the description: Choose 1 of your opponent's Pokemon. This attack does 20 damage to that Pokemon. This attack's damage isn't affected by Weakness, Resistance, Poke-Powers, Poke-Bodies, or any other effects on that Pokemon. It has the attack Doom News with the cost Darkness, Colorless, Colorless, the energy cost 3 with the description: Return all Energy cards attached to Absol G to your hand. The Defending Pokemon is Knocked Out at the end of your opponent's next turn. It has weakness against Fighting 2. It has resistance against Psychic -20. ","image_url":"https://images.pokemontcg.io/pl3/1_hires.png","hp":70},"magnitude":1.070363962559314},"ex12-1":{"id":"ex12-1","attributes":{"name":"Aerodactyl","set_name":"Legend Maker","caption":"A Stage 1 Pokemon Card of type Colorless with the title Aerodactyl and 70 HP of rarity Rare Holo evolved from Mysterious Fossil from the set Legend Maker. It has the attack Power Blow with the cost Colorless, the energy cost 1 and the damage of 10+ with the description: Does 10 damage plus 10 more damage for each Energy attached to Aerodactyl. It has the attack Speed Stroke with the cost Colorless, Colorless, Colorless, the energy cost 3 and the damage of 40 with the description: During your opponent's next turn, prevent all effects, including damage, done to Aerodactyl by attacks from your opponent's Pokemon-ex. It has the ability Reactive Protection with the description: Any damage done to Aerodactyl by attacks from your opponent's Pokemon is reduced by 10 for each React Energy card attached to Aerodactyl (after applying Weakness and Resistance). It has weakness against Lightning 2. It has resistance against Fighting -30. ","image_url":"https://images.pokemontcg.io/ex12/1_hires.png","hp":70},"magnitude":1.2941388259451931},"xy5-1":{"id":"xy5-1","attributes":{"name":"Weedle","set_name":"Primal Clash","caption":"A Basic Pokemon Card of type Grass with the title Weedle and 50 HP of rarity Common from the set Primal Clash and the flavor text: Its poison stinger is very powerful. Its bright-colored body is intended to warn off its enemies. It has the attack Multiply with the cost Grass, the energy cost 1 with the description: Search your deck for Weedle and put it onto your Bench. Shuffle your deck afterward. It has weakness against Fire 2. ","image_url":"https://images.pokemontcg.io/xy5/1_hires.png","hp":50},"magnitude":1.261075929514161},"mcd19-1":{"id":"mcd19-1","attributes":{"name":"Caterpie","set_name":"McDonald's Collection 2019","caption":"A Basic Pokemon Card of type Grass with the title Caterpie and 50 HP of rarity None from the set McDonald's Collection 2019. It has the attack Surprise Attack with the cost Grass, the energy cost 1 and the damage of 20 with the description: Flip a coin. If tails, this attack does nothing. ","image_url":"https://images.pokemontcg.io/mcd19/1_hires.png","hp":50},"magnitude":1.1878517312935937},"ex7-1":{"id":"ex7-1","attributes":{"name":"Azumarill","set_name":"Team Rocket Returns","caption":"A Stage 1 Pokemon Card of type Water with the title Azumarill and 80 HP of rarity Rare Holo evolved from Marill from the set Team Rocket Returns. It has the attack Water Punch with the cost Water, Colorless, the energy cost 2 and the damage of 20+ with the description: Flip a coin for each Water Energy attached to Azumarill. This attack does 20 damage plus 20 more damage for each heads. It has the ability Froth with the description: Once during your turn, when you play Azumarill from your hand to evolve 1 of your Active Pokemon, you may use this power. Each Defending Pokemon is now Paralyzed. It has weakness against Lightning 2. ","image_url":"https://images.pokemontcg.io/ex7/1_hires.png","hp":80},"magnitude":1.1959271615210927}},"features":{"name":{"absol":{"pl3-1":0.5},"g":{"pl3-1":0.5},"aerodactyl":{"ex12-1":1},"weedle":{"xy5-1":1},"caterpie":{"mcd19-1":1},"azumarill":{"ex7-1":1}},"set_name":{"2019":{"mcd19-1":0.25},"supreme":{"pl3-1":0.5},"victors":{"pl3-1":0.5},"legend":{"ex12-1":0.5},"maker":{"ex12-1":0.5},"primal":{"xy5-1":0.5},"clash":{"xy5-1":0.5},"mcdonald":{"mcd19-1":0.25},"s":{"mcd19-1":0.25},"collection":{"mcd19-1":0.25},"team":{"ex7-1":0.3333333333333333},"rocket":{"ex7-1":0.3333333333333333},"returns":{"ex7-1":0.3333333333333333}},"caption":{"1":{"pl3-1":0.014814814814814815,"ex12-1":0.012578616352201259,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.017241379310344827},"2":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"ex7-1":0.017241379310344827},"3":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"10":{"ex12-1":0.025157232704402517},"20":{"pl3-1":0.014814814814814815,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"30":{"ex12-1":0.006289308176100629},"40":{"ex12-1":0.006289308176100629},"50":{"xy5-1":0.0125,"mcd19-1":0.01818181818181818},"70":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"80":{"ex7-1":0.008620689655172414},"2019":{"mcd19-1":0.01818181818181818},"a":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.03636363636363636,"ex7-1":0.017241379310344827},"basic":{"pl3-1":0.007407407407407408,"xy5-1":0.0125,"mcd19-1":0.01818181818181818},"sp":{"pl3-1":0.007407407407407408},"pokemon":{"pl3-1":0.037037037037037035,"ex12-1":0.018867924528301886,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"card":{"pl3-1":0.007407407407407408,"ex12-1":0.012578616352201259,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"of":{"pl3-1":0.02962962962962963,"ex12-1":0.025157232704402517,"xy5-1":0.025,"mcd19-1":0.05454545454545454,"ex7-1":0.034482758620689655},"type":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"darkness":{"pl3-1":0.022222222222222223},"with":{"pl3-1":0.037037037037037035,"ex12-1":0.03773584905660377,"xy5-1":0.0375,"mcd19-1":0.05454545454545454,"ex7-1":0.034482758620689655},"the":{"pl3-1":0.08888888888888889,"ex12-1":0.0880503144654088,"xy5-1":0.0875,"mcd19-1":0.12727272727272726,"ex7-1":0.07758620689655173},"title":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"absol":{"pl3-1":0.014814814814814815},"g":{"pl3-1":0.014814814814814815},"and":{"pl3-1":0.007407407407407408,"ex12-1":0.025157232704402517,"xy5-1":0.0375,"mcd19-1":0.03636363636363636,"ex7-1":0.017241379310344827},"hp":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"rarity":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"rare":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"holo":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"from":{"pl3-1":0.007407407407407408,"ex12-1":0.025157232704402517,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"set":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"supreme":{"pl3-1":0.007407407407407408},"victors":{"pl3-1":0.007407407407407408},"it":{"pl3-1":0.02962962962962963,"ex12-1":0.031446540880503145,"xy5-1":0.0375,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"has":{"pl3-1":0.02962962962962963,"ex12-1":0.031446540880503145,"xy5-1":0.025,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"attack":{"pl3-1":0.037037037037037035,"ex12-1":0.012578616352201259,"xy5-1":0.0125,"mcd19-1":0.05454545454545454,"ex7-1":0.017241379310344827},"feint":{"pl3-1":0.007407407407407408},"cost":{"pl3-1":0.02962962962962963,"ex12-1":0.025157232704402517,"xy5-1":0.025,"mcd19-1":0.03636363636363636,"ex7-1":0.017241379310344827},"energy":{"pl3-1":0.022222222222222223,"ex12-1":0.025157232704402517,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.017241379310344827},"description":{"pl3-1":0.014814814814814815,"ex12-1":0.018867924528301886,"xy5-1":0.0125,"mcd19-1":0.01818181818181818,"ex7-1":0.017241379310344827},"choose":{"pl3-1":0.007407407407407408},"your":{"pl3-1":0.022222222222222223,"ex12-1":0.018867924528301886,"xy5-1":0.0375,"ex7-1":0.02586206896551724},"opponent":{"pl3-1":0.014814814814814815,"ex12-1":0.018867924528301886},"s":{"pl3-1":0.022222222222222223,"ex12-1":0.018867924528301886,"mcd19-1":0.01818181818181818},"this":{"pl3-1":0.014814814814814815,"mcd19-1":0.01818181818181818,"ex7-1":0.017241379310344827},"does":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"damage":{"pl3-1":0.014814814814814815,"ex12-1":0.03773584905660377,"mcd19-1":0.01818181818181818,"ex7-1":0.02586206896551724},"to":{"pl3-1":0.022222222222222223,"ex12-1":0.025157232704402517,"xy5-1":0.0125,"ex7-1":0.017241379310344827},"that":{"pl3-1":0.014814814814814815},"isn":{"pl3-1":0.007407407407407408},"t":{"pl3-1":0.007407407407407408},"affected":{"pl3-1":0.007407407407407408},"by":{"pl3-1":0.007407407407407408,"ex12-1":0.018867924528301886},"weakness":{"pl3-1":0.014814814814814815,"ex12-1":0.012578616352201259,"xy5-1":0.0125,"ex7-1":0.008620689655172414},"resistance":{"pl3-1":0.014814814814814815,"ex12-1":0.012578616352201259},"poke":{"pl3-1":0.014814814814814815},"powers":{"pl3-1":0.007407407407407408},"bodies":{"pl3-1":0.007407407407407408},"or":{"pl3-1":0.007407407407407408},"any":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"other":{"pl3-1":0.007407407407407408},"effects":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"on":{"pl3-1":0.007407407407407408},"doom":{"pl3-1":0.007407407407407408},"news":{"pl3-1":0.007407407407407408},"colorless":{"pl3-1":0.014814814814814815,"ex12-1":0.031446540880503145,"ex7-1":0.008620689655172414},"return":{"pl3-1":0.007407407407407408},"all":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"cards":{"pl3-1":0.007407407407407408},"attached":{"pl3-1":0.007407407407407408,"ex12-1":0.012578616352201259,"ex7-1":0.008620689655172414},"hand":{"pl3-1":0.007407407407407408,"ex7-1":0.008620689655172414},"defending":{"pl3-1":0.007407407407407408,"ex7-1":0.008620689655172414},"is":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"xy5-1":0.025,"ex7-1":0.008620689655172414},"knocked":{"pl3-1":0.007407407407407408},"out":{"pl3-1":0.007407407407407408},"at":{"pl3-1":0.007407407407407408},"end":{"pl3-1":0.007407407407407408},"next":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"turn":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"against":{"pl3-1":0.014814814814814815,"ex12-1":0.012578616352201259,"xy5-1":0.0125,"ex7-1":0.008620689655172414},"fighting":{"pl3-1":0.007407407407407408,"ex12-1":0.006289308176100629},"psychic":{"pl3-1":0.007407407407407408},"stage":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"aerodactyl":{"ex12-1":0.031446540880503145},"evolved":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"mysterious":{"ex12-1":0.006289308176100629},"fossil":{"ex12-1":0.006289308176100629},"legend":{"ex12-1":0.006289308176100629},"maker":{"ex12-1":0.006289308176100629},"power":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"blow":{"ex12-1":0.006289308176100629},"plus":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"more":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"for":{"ex12-1":0.012578616352201259,"xy5-1":0.0125,"ex7-1":0.017241379310344827},"each":{"ex12-1":0.012578616352201259,"ex7-1":0.02586206896551724},"speed":{"ex12-1":0.006289308176100629},"stroke":{"ex12-1":0.006289308176100629},"during":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"prevent":{"ex12-1":0.006289308176100629},"including":{"ex12-1":0.006289308176100629},"done":{"ex12-1":0.012578616352201259},"attacks":{"ex12-1":0.012578616352201259},"ex":{"ex12-1":0.006289308176100629},"ability":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"reactive":{"ex12-1":0.006289308176100629},"protection":{"ex12-1":0.006289308176100629},"reduced":{"ex12-1":0.006289308176100629},"react":{"ex12-1":0.006289308176100629},"after":{"ex12-1":0.006289308176100629},"applying":{"ex12-1":0.006289308176100629},"lightning":{"ex12-1":0.006289308176100629,"ex7-1":0.008620689655172414},"grass":{"xy5-1":0.025,"mcd19-1":0.03636363636363636},"weedle":{"xy5-1":0.025},"common":{"xy5-1":0.0125},"primal":{"xy5-1":0.0125},"clash":{"xy5-1":0.0125},"flavor":{"xy5-1":0.0125},"text":{"xy5-1":0.0125},"its":{"xy5-1":0.0375},"poison":{"xy5-1":0.0125},"stinger":{"xy5-1":0.0125},"very":{"xy5-1":0.0125},"powerful":{"xy5-1":0.0125},"bright":{"xy5-1":0.0125},"colored":{"xy5-1":0.0125},"body":{"xy5-1":0.0125},"intended":{"xy5-1":0.0125},"warn":{"xy5-1":0.0125},"off":{"xy5-1":0.0125},"enemies":{"xy5-1":0.0125},"multiply":{"xy5-1":0.0125},"search":{"xy5-1":0.0125},"deck":{"xy5-1":0.025},"put":{"xy5-1":0.0125},"onto":{"xy5-1":0.0125},"bench":{"xy5-1":0.0125},"shuffle":{"xy5-1":0.0125},"afterward":{"xy5-1":0.0125},"fire":{"xy5-1":0.0125},"caterpie":{"mcd19-1":0.01818181818181818},"none":{"mcd19-1":0.01818181818181818},"mcdonald":{"mcd19-1":0.01818181818181818},"collection":{"mcd19-1":0.01818181818181818},"surprise":{"mcd19-1":0.01818181818181818},"flip":{"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"coin":{"mcd19-1":0.01818181818181818,"ex7-1":0.008620689655172414},"if":{"mcd19-1":0.01818181818181818},"tails":{"mcd19-1":0.01818181818181818},"nothing":{"mcd19-1":0.01818181818181818},"water":{"ex7-1":0.034482758620689655},"azumarill":{"ex7-1":0.02586206896551724},"marill":{"ex7-1":0.008620689655172414},"team":{"ex7-1":0.008620689655172414},"rocket":{"ex7-1":0.008620689655172414},"returns":{"ex7-1":0.008620689655172414},"punch":{"ex7-1":0.008620689655172414},"heads":{"ex7-1":0.008620689655172414},"froth":{"ex7-1":0.008620689655172414},"once":{"ex7-1":0.008620689655172414},"when":{"ex7-1":0.008620689655172414},"you":{"ex7-1":0.017241379310344827},"play":{"ex7-1":0.008620689655172414},"evolve":{"ex7-1":0.008620689655172414},"active":{"ex7-1":0.008620689655172414},"may":{"ex7-1":0.008620689655172414},"use":{"ex7-1":0.008620689655172414},"now":{"ex7-1":0.008620689655172414},"paralyzed":{"ex7-1":0.008620689655172414}}},"popularSearches":{}} \ No newline at end of file diff --git a/packages/cli/test/fixtures/pokemon.json b/packages/cli/test/fixtures/pokemon.json new file mode 100644 index 0000000..3aaa15d --- /dev/null +++ b/packages/cli/test/fixtures/pokemon.json @@ -0,0 +1,42 @@ +[ + { + "id": "pl3-1", + "image_url": "https://images.pokemontcg.io/pl3/1_hires.png", + "caption": "A Basic, SP Pokemon Card of type Darkness with the title Absol G and 70 HP of rarity Rare Holo from the set Supreme Victors. It has the attack Feint Attack with the cost Darkness, the energy cost 1 with the description: Choose 1 of your opponent's Pokemon. This attack does 20 damage to that Pokemon. This attack's damage isn't affected by Weakness, Resistance, Poke-Powers, Poke-Bodies, or any other effects on that Pokemon. It has the attack Doom News with the cost Darkness, Colorless, Colorless, the energy cost 3 with the description: Return all Energy cards attached to Absol G to your hand. The Defending Pokemon is Knocked Out at the end of your opponent's next turn. It has weakness against Fighting 2. It has resistance against Psychic -20. ", + "name": "Absol G", + "hp": 70, + "set_name": "Supreme Victors" + }, + { + "id": "ex12-1", + "image_url": "https://images.pokemontcg.io/ex12/1_hires.png", + "caption": "A Stage 1 Pokemon Card of type Colorless with the title Aerodactyl and 70 HP of rarity Rare Holo evolved from Mysterious Fossil from the set Legend Maker. It has the attack Power Blow with the cost Colorless, the energy cost 1 and the damage of 10+ with the description: Does 10 damage plus 10 more damage for each Energy attached to Aerodactyl. It has the attack Speed Stroke with the cost Colorless, Colorless, Colorless, the energy cost 3 and the damage of 40 with the description: During your opponent's next turn, prevent all effects, including damage, done to Aerodactyl by attacks from your opponent's Pokemon-ex. It has the ability Reactive Protection with the description: Any damage done to Aerodactyl by attacks from your opponent's Pokemon is reduced by 10 for each React Energy card attached to Aerodactyl (after applying Weakness and Resistance). It has weakness against Lightning 2. It has resistance against Fighting -30. ", + "name": "Aerodactyl", + "hp": 70, + "set_name": "Legend Maker" + }, + { + "id": "xy5-1", + "image_url": "https://images.pokemontcg.io/xy5/1_hires.png", + "caption": "A Basic Pokemon Card of type Grass with the title Weedle and 50 HP of rarity Common from the set Primal Clash and the flavor text: Its poison stinger is very powerful. Its bright-colored body is intended to warn off its enemies. It has the attack Multiply with the cost Grass, the energy cost 1 with the description: Search your deck for Weedle and put it onto your Bench. Shuffle your deck afterward. It has weakness against Fire 2. ", + "name": "Weedle", + "hp": 50, + "set_name": "Primal Clash" + }, + { + "id": "mcd19-1", + "image_url": "https://images.pokemontcg.io/mcd19/1_hires.png", + "caption": "A Basic Pokemon Card of type Grass with the title Caterpie and 50 HP of rarity None from the set McDonald's Collection 2019. It has the attack Surprise Attack with the cost Grass, the energy cost 1 and the damage of 20 with the description: Flip a coin. If tails, this attack does nothing. ", + "name": "Caterpie", + "hp": 50, + "set_name": "McDonald's Collection 2019" + }, + { + "id": "ex7-1", + "image_url": "https://images.pokemontcg.io/ex7/1_hires.png", + "caption": "A Stage 1 Pokemon Card of type Water with the title Azumarill and 80 HP of rarity Rare Holo evolved from Marill from the set Team Rocket Returns. It has the attack Water Punch with the cost Water, Colorless, the energy cost 2 and the damage of 20+ with the description: Flip a coin for each Water Energy attached to Azumarill. This attack does 20 damage plus 20 more damage for each heads. It has the ability Froth with the description: Once during your turn, when you play Azumarill from your hand to evolve 1 of your Active Pokemon, you may use this power. Each Defending Pokemon is now Paralyzed. It has weakness against Lightning 2. ", + "name": "Azumarill", + "hp": 80, + "set_name": "Team Rocket Returns" + } +] \ No newline at end of file diff --git a/packages/cli/test/index.test.ts b/packages/cli/test/index.test.ts new file mode 100644 index 0000000..6d7baa9 --- /dev/null +++ b/packages/cli/test/index.test.ts @@ -0,0 +1,86 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +/// + +import { readFile, unlink } from 'node:fs/promises' +import { inflate, ungzip } from 'pako' +import { searchDataset, trainDataset } from '../src/api.js' +import pokemonTrained from './fixtures/pokemon-trained.json' + +vi.spyOn(process.stderr, 'write').mockImplementation(vi.fn()) +vi.spyOn(console, 'time').mockImplementation(vi.fn()) +vi.spyOn(console, 'timeEnd').mockImplementation(vi.fn()) + +beforeEach(() => { + vi.clearAllMocks() +}) + +afterAll(async () => { + await unlink('test/fixtures/pokemon-trained.dat') + await unlink('test/fixtures/pokemon-trained.gz') +}) + +describe('train', () => { + it('should train the dataset and save the trained data', async () => { + await trainDataset('pokemon.json', '', { + cwd: 'test/fixtures' + }) + + const trainedDataset = ungzip( + await readFile('test/fixtures/pokemon-trained.gz'), + { to: 'string' } + ) + const state = JSON.parse(trainedDataset) + expect(state).toEqual(pokemonTrained) + }) + + it('should train the dataset and save the trained data with --defalte flag', async () => { + await trainDataset('pokemon.json', '', { + cwd: 'test/fixtures', + isDeflate: true + }) + + const trainedDataset = inflate( + await readFile('test/fixtures/pokemon-trained.dat'), + { to: 'string' } + ) + const state = JSON.parse(trainedDataset) + expect(state).toEqual(pokemonTrained) + }) + + it('throws an error for invalid dataset path', async () => { + await expect(trainDataset('invalid.json', 'missing')).rejects.toThrowError() + }) +}) + +describe('search', () => { + it('searches a trained dataset and returns results', async () => { + const { currentPage, results, totalPages, totalResults } = + await searchDataset('pokemon-trained.gz', 'pokemon', { + cwd: 'test/fixtures', + offset: '2', + length: '3' + }) + + expect(currentPage).toBe(2) + expect(results).toHaveLength(2) + expect(totalPages).toBe(2) + expect(totalResults).toBe(5) + }) + + it('searches a deflated trained dataset and returns results', async () => { + const { currentPage, results, totalPages, totalResults } = + await searchDataset('pokemon-trained.dat', 'grass caterpie', { + cwd: 'test/fixtures', + isDeflate: true + }) + + expect(currentPage).toBe(1) + expect(results).toHaveLength(2) + expect(totalPages).toBe(1) + expect(totalResults).toBe(2) + }) + + test('throws an error for invalid trained data path', async () => { + await expect(searchDataset('invalid.dat', 'missing')).rejects.toThrowError() + }) +}) diff --git a/packages/cli/tsconfig.json b/packages/cli/tsconfig.json new file mode 100644 index 0000000..488d535 --- /dev/null +++ b/packages/cli/tsconfig.json @@ -0,0 +1,5 @@ +{ + "extends": "../../tsconfig.json", + + "include": ["src"] +} diff --git a/packages/cli/vite.config.ts b/packages/cli/vite.config.ts new file mode 100644 index 0000000..b5e4438 --- /dev/null +++ b/packages/cli/vite.config.ts @@ -0,0 +1,22 @@ +/// +import { defineConfig } from 'vite' +import pluginCache from 'vite-plugin-cachedir' + +export default defineConfig({ + plugins: [pluginCache()], + ssr: { + noExternal: ['batch-me-up'] + }, + build: { + target: 'es2022', + ssr: 'src/index.ts' + }, + test: { + globals: true, + include: ['test/*.test.ts'], + coverage: { + include: ['src'], + exclude: ['src/index.ts'] + } + } +}) diff --git a/readme.md b/readme.md index a3db69f..696570b 100644 --- a/readme.md +++ b/readme.md @@ -7,6 +7,7 @@ Boox is a lightweight, zero-dependency full-text search library designed for bot | Package | Description | Version (click for changelog) | | :-------------------- | :----------------------------------------- | :------------------------------------------------------------------------- | | [core](packages/core) | The Boox package. | [![npm](https://img.shields.io/npm/v/boox)](packages/core/changelog.md) | +| [cli](packages/cli) | CLI tool to train your datasets with Boox. | [![npm](https://img.shields.io/npm/v/boox-cli)](packages/cli/changelog.md) | ## Documentation