From 7a200094d69ddc2c5e201a9df79845e79c2d58e9 Mon Sep 17 00:00:00 2001 From: Soichiro Miki Date: Thu, 4 Apr 2024 20:48:14 +0900 Subject: [PATCH 1/4] Introduce multiple file translation --- env-example | 3 + src/index.ts | 138 ++++++++++++++++++++++++++------------- src/loadConfig.ts | 17 +++++ src/utils/error-utils.ts | 2 +- src/utils/fs-utils.ts | 45 ++++++++----- 5 files changed, 143 insertions(+), 62 deletions(-) diff --git a/env-example b/env-example index e90525a..3efd26a 100644 --- a/env-example +++ b/env-example @@ -40,5 +40,8 @@ OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxx" # Transforms the input path to the output file path. # OUTPUT_FILE_PATTERN="" +# What to do when the output file already exists. One of "overwite", "skip" and "abort". +# OVERWRITE_POLICY="overwrite" + # Custom API address, to integrate with a third-party API service provider. # API_ENDPOINT="https://api.openai.com/v1/chat/completions" \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 41e40a1..a5db470 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,9 +5,10 @@ import * as path from 'node:path'; import dashdash from 'dashdash'; import pc from 'picocolors'; import configureApiCaller from './api.js'; -import { loadConfig } from './loadConfig.js'; +import { type Config, loadConfig } from './loadConfig.js'; import { type DoneStatus, type Status, statusToText } from './status.js'; import { translateMultiple } from './translate.js'; +import { isMessageError } from './utils/error-utils.js'; import formatTime from './utils/formatTime.js'; import { checkFileWritable, @@ -20,47 +21,12 @@ import { splitStringAtBlankLines } from './utils/md-utils.js'; -const options = [ - { names: ['model', 'm'], type: 'string', help: 'Model to use.' }, - { names: ['fragment-size', 'f'], type: 'number', help: 'Fragment size.' }, - { names: ['temperature', 't'], type: 'number', help: 'Temperature.' }, - { names: ['interval', 'i'], type: 'number', help: 'API call interval.' }, - { names: ['quiet', 'q'], type: 'bool', help: 'Suppress status messages.' }, - { names: ['out', 'o'], type: 'string', help: 'Output file.' }, - { - names: ['out-suffix'], - type: 'string', - help: 'Output file suffix.', - hidden: true - }, - { names: ['help', 'h'], type: 'bool', help: 'Print this help.' } -]; - -const main = async () => { - const parser = dashdash.createParser({ options }); - const args = parser.parse(); - - if (args.help || args._args.length !== 1) { - if (args._args.length !== 1) - console.log(pc.red('Specify one (and only one) markdown file.')); - console.log(pc.yellow('Usage: chatgpt-md-translator [options] ')); - console.log(parser.help()); - console.log('Docs: https://github.com/smikitky/chatgpt-md-translator\n'); - return; - } - - const { config, warnings } = await loadConfig(args); - for (const warning of warnings) - console.error(pc.bgYellow('Warn'), pc.yellow(warning)); - - const file = args._args[0]; - const filePath = path.resolve(config.baseDir ?? process.cwd(), file); - const markdown = await readTextFile(filePath); - - const outFile = config.out - ? path.resolve(config.baseDir ?? process.cwd(), config.out) - : resolveOutFilePath(filePath, config.baseDir, config.outputFilePattern); - await checkFileWritable(outFile); +const translateFile = async ( + inFile: string, + outFile: string, + config: Config +) => { + const markdown = await readTextFile(inFile); const { output: replacedMd, codeBlocks } = replaceCodeBlocks( markdown, @@ -73,8 +39,8 @@ const main = async () => { let status: Status = { status: 'pending', lastToken: '' }; - console.log(pc.cyan(`Translating: ${filePath}`)); - if (filePath !== outFile) console.log(pc.cyan(`To: ${outFile}`)); + console.log(pc.cyan(`Translating: ${inFile}`)); + if (inFile !== outFile) console.log(pc.cyan(`To: ${outFile}`)); console.log( pc.bold('Model:'), @@ -119,6 +85,90 @@ const main = async () => { console.log(`File saved as ${outFile}.`); }; +const options = [ + { names: ['model', 'm'], type: 'string', help: 'Model to use.' }, + { names: ['fragment-size', 'f'], type: 'number', help: 'Fragment size.' }, + { names: ['temperature', 't'], type: 'number', help: 'Temperature.' }, + { names: ['interval', 'i'], type: 'number', help: 'API call interval.' }, + { names: ['quiet', 'q'], type: 'bool', help: 'Suppress status messages.' }, + { names: ['out', 'o'], type: 'string', help: 'Output file.' }, + { + names: ['out-suffix'], + type: 'string', + help: 'Output file suffix.', + hidden: true + }, + { + names: ['overwrite-policy', 'w'], + type: 'string', + help: 'File overwrite policy.' + }, + { names: ['help', 'h'], type: 'bool', help: 'Print this help.' } +]; + +const main = async () => { + const parser = dashdash.createParser({ options }); + const args = parser.parse(); + + if (args.help || args._args.length < 1) { + if (args._args.length < 1) + console.log(pc.red('No input files are specified.')); + console.log(pc.yellow('Usage: chatgpt-md-translator [options] ')); + console.log(parser.help()); + console.log('Docs: https://github.com/smikitky/chatgpt-md-translator\n'); + return; + } + + const { config, warnings } = await loadConfig(args); + for (const warning of warnings) + console.error(pc.bgYellow('Warn'), pc.yellow(warning)); + + if (args._args.length > 1 && typeof config.out === 'string') { + throw new Error( + 'You cannot specify output file name when translating multiple files. ' + + 'Use OUTPUT_FILE_PATTERN instead.' + ); + } + + const pathMap = new Map(); + for (const file of args._args) { + const inFile = path.resolve(config.baseDir ?? process.cwd(), file); + const outFile = config.out + ? path.resolve(config.baseDir ?? process.cwd(), config.out) + : resolveOutFilePath(inFile, config.baseDir, config.outputFilePattern); + + if (pathMap.has(inFile)) throw new Error(`Duplicate input file: ${inFile}`); + if (Array.from(pathMap.values()).includes(outFile)) + throw new Error( + `Multiple files are being translated to the same output: ${outFile}` + ); + + pathMap.set(inFile, outFile); + } + + for (const [inFile, outFile] of pathMap) { + try { + await checkFileWritable(outFile, config.overwritePolicy !== 'overwrite'); + await translateFile(inFile, outFile, config); + } catch (e: unknown) { + if (isMessageError(e) && e.message.startsWith('File already exists')) { + if (config.overwritePolicy === 'skip') { + console.error( + pc.bgCyan('Info'), + `Skipping file because output already exists: ${inFile}` + ); + continue; + } + throw e; // This will exit the loop + } + console.error( + pc.bgRed('Error'), + pc.red(e instanceof Error ? e.message : 'Unknown error') + ); + } + } +}; + main().catch(err => { console.error(pc.bgRed('Error'), pc.red(err.message)); console.error(pc.gray(err.stack.split('\n').slice(1).join('\n'))); diff --git a/src/loadConfig.ts b/src/loadConfig.ts index 21915f3..159efba 100644 --- a/src/loadConfig.ts +++ b/src/loadConfig.ts @@ -7,6 +7,9 @@ import { readTextFile } from './utils/fs-utils.js'; const homeDir = os.homedir(); +const overwritePolicies = ['skip', 'abort', 'overwrite'] as const; +export type OverwritePolicy = (typeof overwritePolicies)[number]; + export interface Config { apiEndpoint: string; apiKey: string; @@ -20,6 +23,7 @@ export interface Config { codeBlockPreservationLines: number; out: string | null; outputFilePattern: string | null; + overwritePolicy: OverwritePolicy; httpsProxy?: string; } @@ -92,6 +96,15 @@ export const loadConfig = async (args: { warnings.push('OUT_SUFFIX is deprecated. Use OUTPUT_FILE_PATTERN instead.'); } + const checkOverwritePolicy = (input: unknown): OverwritePolicy | null => { + if (typeof input === 'string') { + if (overwritePolicies.includes(input as OverwritePolicy)) + return input as OverwritePolicy; + throw new Error(`Invalid overwrite policy: ${input}`); + } + return null; + }; + const config = { apiEndpoint: conf.API_ENDPOINT ?? 'https://api.openai.com/v1/chat/completions', @@ -113,6 +126,10 @@ export const loadConfig = async (args: { (conf.OUTPUT_FILE_PATTERN?.length > 0 ? conf.OUTPUT_FILE_PATTERN : null) ?? (outSuffix ? `{main}${outSuffix}` : null), + overwritePolicy: + checkOverwritePolicy(args.overwrite_policy) ?? + checkOverwritePolicy(conf.OVERWRITE_POLICY) ?? + 'overwrite', httpsProxy: conf.HTTPS_PROXY ?? process.env.HTTPS_PROXY }; diff --git a/src/utils/error-utils.ts b/src/utils/error-utils.ts index 45ed53f..2a74f2e 100644 --- a/src/utils/error-utils.ts +++ b/src/utils/error-utils.ts @@ -3,7 +3,7 @@ import typeUtils from 'node:util/types'; export const isNodeException = ( error: unknown ): error is NodeJS.ErrnoException => { - return typeUtils.isNativeError(error); + return typeUtils.isNativeError(error) && 'code' in error && 'errno' in error; }; export const isMessageError = ( diff --git a/src/utils/fs-utils.ts b/src/utils/fs-utils.ts index ad3ad48..3138401 100644 --- a/src/utils/fs-utils.ts +++ b/src/utils/fs-utils.ts @@ -21,29 +21,40 @@ export const readTextFile = async (filePath: string): Promise => { } }; -export const checkFileWritable = async (filePath: string): Promise => { +export const checkDirectoryWritable = async ( + dirPath: string +): Promise => { + try { + await fs.access(dirPath, fs.constants.F_OK | fs.constants.W_OK); + } catch (dirError) { + if (!isNodeException(dirError)) throw dirError; + switch (dirError.code) { + case 'ENOENT': + throw new Error(`Directory does not exist: ${dirPath}`); + case 'EACCES': + throw new Error(`Directory is not writable: ${dirPath}`); + default: + throw dirError; + } + } +}; + +export const checkFileWritable = async ( + filePath: string, + throwOnOverwrite: boolean +): Promise => { try { await fs.access(filePath, fs.constants.F_OK | fs.constants.W_OK); // The file exists but can be overwritten + if (throwOnOverwrite) throw new Error(`File already exists: ${filePath}`); return; - } catch (fileError) { - if (!isNodeException(fileError)) throw fileError; - if (fileError.code === 'ENOENT') { + } catch (e) { + if (!isNodeException(e)) throw e; + if (e.code === 'ENOENT') { // The file does not exist, check if directory is writable const dirPath = path.dirname(filePath); - try { - await fs.access(dirPath, fs.constants.F_OK | fs.constants.W_OK); - // Directory exists and is writable - return; - } catch (dirError) { - if (!isNodeException(dirError)) throw dirError; - if (dirError.code === 'ENOENT') { - // Directory does not exist - throw new Error(`Directory does not exist: ${dirPath}`); - } - // Directory exists but is not writable, or other errors - throw new Error(`Directory is not writable: ${dirPath}`); - } + await checkDirectoryWritable(dirPath); + return; } // File exists but is not writable, or other errors throw new Error(`File is not writable: ${filePath}`); From 1b7799113d4d0c66379bce2d630789d03030cb97 Mon Sep 17 00:00:00 2001 From: Soichiro Miki Date: Thu, 4 Apr 2024 21:35:58 +0900 Subject: [PATCH 2/4] Print model/temperature only once --- src/index.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/index.ts b/src/index.ts index a5db470..2fd478e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -41,14 +41,7 @@ const translateFile = async ( console.log(pc.cyan(`Translating: ${inFile}`)); if (inFile !== outFile) console.log(pc.cyan(`To: ${outFile}`)); - - console.log( - pc.bold('Model:'), - config.model, - pc.bold('Temperature:'), - String(config.temperature), - '\n' - ); + console.log(''); // empty line const printStatus = () => { if (config.quiet) return; @@ -82,7 +75,7 @@ const translateFile = async ( await fs.writeFile(outFile, finalResult, 'utf-8'); console.log(pc.green(`Translation completed in ${formatTime(elapsedTime)}.`)); - console.log(`File saved as ${outFile}.`); + console.log(`File saved as ${outFile}.\n`); }; const options = [ @@ -146,6 +139,13 @@ const main = async () => { pathMap.set(inFile, outFile); } + console.log( + pc.bold('Model:'), + config.model, + pc.bold('Temperature:'), + String(config.temperature) + ); + for (const [inFile, outFile] of pathMap) { try { await checkFileWritable(outFile, config.overwritePolicy !== 'overwrite'); From e1c6891c92e590da52648db0c0634911ced358f5 Mon Sep 17 00:00:00 2001 From: Soichiro Miki Date: Thu, 4 Apr 2024 21:42:23 +0900 Subject: [PATCH 3/4] Explain overwtite policy in README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index b68b27c..422b18e 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,8 @@ By default, the content of the input file will be overwritten with the translate Alternatively, you can directly specify the output file name in command line, like `-o translated.md` or `--out=translated.md`. The path will be relative to the current directory (or `BASE_DIR` if it's defined in the config file). +If you are translating many files, consider using the `OVERWRITE_POLICY` option as well to skip already translated files. + ## CLI Options These can be used to override the settings in the config file. @@ -111,6 +113,7 @@ Example: `markdown-gpt-translator -m 4 -f 1000 learn/thinking-in-react.md` - `-t NUM`, `--temperature=NUM`: Sets the "temperature", or the randomness of the output. - `-i NUM`, `--interval=NUM`: Sets the API call interval. - `-o NAME`, `--out=NAME`: Explicitly sets the output file name. If set, the `OUTPUT_FILE_PATTERN` setting will be ignored. +- `-w ARG`, `--overwrite-policy=ARG`: Determines what happens when the output file already exists. One of "overwrite" (default), "skip", and "abort". ## Limitations and Pitfalls From 130063d469b808309caf5b765518690af7f36ef6 Mon Sep 17 00:00:00 2001 From: Soichiro Miki Date: Thu, 4 Apr 2024 21:44:10 +0900 Subject: [PATCH 4/4] Update command line help message --- src/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 2fd478e..789fd2c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -106,7 +106,9 @@ const main = async () => { if (args.help || args._args.length < 1) { if (args._args.length < 1) console.log(pc.red('No input files are specified.')); - console.log(pc.yellow('Usage: chatgpt-md-translator [options] ')); + console.log( + pc.yellow('Usage: chatgpt-md-translator [options] [...]') + ); console.log(parser.help()); console.log('Docs: https://github.com/smikitky/chatgpt-md-translator\n'); return;