diff --git a/.vitepress/config/apiReferenceSidebar.ts b/.vitepress/config/apiReferenceSidebar.ts index b8a6fc49..69fead26 100644 --- a/.vitepress/config/apiReferenceSidebar.ts +++ b/.vitepress/config/apiReferenceSidebar.ts @@ -10,6 +10,7 @@ const categoryOrder = [ const functionsOrder = [ "getLlama", + "resolveModelFile", "defineChatSessionFunction", "createModelDownloader", "resolveChatWrapper", diff --git a/docs/cli/pull.md b/docs/cli/pull.md index 461dfb34..607ffa6e 100644 --- a/docs/cli/pull.md +++ b/docs/cli/pull.md @@ -13,10 +13,16 @@ const commandDoc = docs.pull; A wrapper around [`ipull`](https://www.npmjs.com/package/ipull) to download model files as fast as possible with parallel connections and other optimizations. -Automatically handles split and binary-split models files, so only pass the URL to the first file of a model. +Automatically handles split and binary-split models files, so only pass the URI to the first file of a model. If a file already exists and its size matches the expected size, it will not be downloaded again unless the `--override` flag is used. +The supported URI schemes are: +- **HTTP:** `https://`, `http://` +- **Hugging Face:** `hf://#` (`#` is optional) + +Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris). + > To programmatically download a model file in your code, use [`createModelDownloader()`](../api/functions/createModelDownloader.md) ## Usage diff --git a/docs/guide/choosing-a-model.md b/docs/guide/choosing-a-model.md index 9740b23b..d1a4891a 100644 --- a/docs/guide/choosing-a-model.md +++ b/docs/guide/choosing-a-model.md @@ -164,3 +164,7 @@ npx --no node-llama-cpp pull --dir ./models > > If the model file URL is of a single part of a multi-part model (for example, [this model](https://huggingface.co/bartowski/Meta-Llama-3-70B-Instruct-GGUF/blob/main/Meta-Llama-3-70B-Instruct-Q5_K_L.gguf/Meta-Llama-3-70B-Instruct-Q5_K_L-00001-of-00002.gguf)), > it will also download all the other parts as well into the same directory. + +::: tip +Consider using [model URIs](./downloading-models.md#model-uris) to download and load models. +::: diff --git a/docs/guide/downloading-models.md b/docs/guide/downloading-models.md index d9b3a099..1a112b49 100644 --- a/docs/guide/downloading-models.md +++ b/docs/guide/downloading-models.md @@ -69,6 +69,50 @@ This option is recommended for more advanced use cases, such as downloading mode If you know the exact model URLs you're going to need every time in your project, it's better to download the models automatically after running `npm install` as described in the [Using the CLI](#cli) section. +## Model URIs {#model-uris} +You can reference models using a URI instead of their full download URL when using the CLI and relevant methods. + +When downloading a model from a URI, the model files will be prefixed with a corresponding adaptation of the URI. + +To reference a model from Hugging Face, you can use the scheme +
+`hf://#` (`#` is optional). + +Here's an example usage of the Hugging Face URI scheme: +``` +hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf +``` + +When using a URI to reference a model, +it's recommended [to add it to your `package.json` file](#cli) to ensure it's downloaded when running `npm install`, +and also resolve it using the [`resolveModelFile`](../api/functions/resolveModelFile.md) method to get the full path of the resolved model file. + +Here's and example usage of the [`resolveModelFile`](../api/functions/resolveModelFile.md) method: +```typescript +import {fileURLToPath} from "url"; +import path from "path"; +import {getLlama, resolveModelFile} from "node-llama-cpp"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const modelsDirectory = path.join(__dirname, "models"); + +const modelPath = await resolveModelFile( + "hf:user/model/model-file.gguf", + modelsDirectory +); + +const llama = await getLlama(); +const model = await llama.loadModel({modelPath}); +``` + +::: tip NOTE +If a corresponding model file is not found in the given directory, the model will automatically be downloaded. + +When a file is being downloaded, the download progress is shown in the console by default. +
+Set the [`cli`](../api/type-aliases/ResolveModelFileOptions#cli) option to `false` to disable this behavior. +::: + ## Downloading Gated Models From Hugging Face {#hf-token} Some models on Hugging Face are "gated", meaning they require a manual consent from you before you can download them. @@ -76,9 +120,10 @@ To download such models, after completing the consent form on the model card, yo * Set an environment variable called `HF_TOKEN` the token * Set the `~/.cache/huggingface/token` file content to the token -Now, using the CLI or the [`createModelDownloader`](../api/functions/createModelDownloader.md) method will automatically use the token to download gated models. +Now, using the CLI, the [`createModelDownloader`](../api/functions/createModelDownloader.md) method, +or the [`resolveModelFile`](../api/functions/resolveModelFile.md) method will automatically use the token to download gated models. -Alternatively, you can use the token in the [`tokens`](../api/type-aliases/ModelDownloaderOptions.md#tokens) option when using [`createModelDownloader`](../api/functions/createModelDownloader.md). +Alternatively, you can use the token in the [`tokens`](../api/type-aliases/ModelDownloaderOptions.md#tokens) option when using [`createModelDownloader`](../api/functions/createModelDownloader.md) or [`resolveModelFile`](../api/functions/resolveModelFile.md). ## Inspecting Remote Models You can inspect the metadata of a remote model without downloading it by either using the [`inspect gguf` command](../cli/inspect/gguf.md) with a URL, diff --git a/docs/guide/index.md b/docs/guide/index.md index e8007022..cfc5c7ec 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -51,9 +51,9 @@ npx --no node-llama-cpp inspect gpu ``` ## Getting a Model File -We recommend you to get a GGUF model from either [Michael Radermacher on Hugging Face](https://huggingface.co/mradermacher) or [search HuggingFace directly](https://huggingface.co/models?library=gguf) for a GGUF model. +We recommend getting a GGUF model from either [Michael Radermacher on Hugging Face](https://huggingface.co/mradermacher) or by [searching HuggingFace directly](https://huggingface.co/models?library=gguf) for a GGUF model. -We recommend you to start by getting a small model that doesn't have a lot of parameters just to ensure everything works, so try downloading a `7B`/`8B` parameters model first (search for models with both `7B`/`8B` and `GGUF` in their name). +We recommend starting by getting a small model that doesn't have a lot of parameters just to ensure everything works, so try downloading a `7B`/`8B` parameters model first (search for models with both `7B`/`8B` and `GGUF` in their name). For improved download speeds, you can use the [`pull`](../cli/pull.md) command to download a model: ```shell diff --git a/scripts/scaffoldElectronExampleForCiBuild.ts b/scripts/scaffoldElectronExampleForCiBuild.ts index 23ae2234..09f3dddf 100644 --- a/scripts/scaffoldElectronExampleForCiBuild.ts +++ b/scripts/scaffoldElectronExampleForCiBuild.ts @@ -40,8 +40,8 @@ await scaffoldProjectTemplate({ directoryPath: resolvedPackageFolderPath, parameters: { [ProjectTemplateParameter.ProjectName]: projectName, - [ProjectTemplateParameter.ModelUrl]: "https://github.com/withcatai/node-llama-cpp", - [ProjectTemplateParameter.ModelFilename]: "model.gguf", + [ProjectTemplateParameter.ModelUriOrUrl]: "https://github.com/withcatai/node-llama-cpp", + [ProjectTemplateParameter.ModelUriOrFilename]: "model.gguf", [ProjectTemplateParameter.CurrentModuleVersion]: packageVersion } }); diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts index fc94c378..00ffcf3b 100644 --- a/src/chatWrappers/Llama3_1ChatWrapper.ts +++ b/src/chatWrappers/Llama3_1ChatWrapper.ts @@ -36,13 +36,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper { /** * @param options */ - public constructor({ - cuttingKnowledgeDate = new Date("2023-12-01T00:00:00Z"), - todayDate = () => new Date(), - noToolInstructions = false, - - _specialTokensTextForPreamble = false - }: { + public constructor(options: { /** * Set to `null` to disable * @@ -64,6 +58,14 @@ export class Llama3_1ChatWrapper extends ChatWrapper { } = {}) { super(); + const { + cuttingKnowledgeDate = new Date("2023-12-01T00:00:00Z"), + todayDate = () => new Date(), + noToolInstructions = false, + + _specialTokensTextForPreamble = false + } = options; + this.cuttingKnowledgeDate = cuttingKnowledgeDate == null ? null : cuttingKnowledgeDate instanceof Function diff --git a/src/cli/commands/ChatCommand.ts b/src/cli/commands/ChatCommand.ts index a4f71839..05aa1931 100644 --- a/src/cli/commands/ChatCommand.ts +++ b/src/cli/commands/ChatCommand.ts @@ -77,9 +77,9 @@ export const ChatCommand: CommandModule = { return yargs .option("modelPath", { - alias: ["m", "model", "path", "url"], + alias: ["m", "model", "path", "url", "uri"], type: "string", - description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models" + description: "Model file to use for the chat. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models" }) .option("header", { alias: ["H"], diff --git a/src/cli/commands/CompleteCommand.ts b/src/cli/commands/CompleteCommand.ts index 6ccadc72..c765150c 100644 --- a/src/cli/commands/CompleteCommand.ts +++ b/src/cli/commands/CompleteCommand.ts @@ -57,9 +57,9 @@ export const CompleteCommand: CommandModule = { builder(yargs) { return yargs .option("modelPath", { - alias: ["m", "model", "path", "url"], + alias: ["m", "model", "path", "url", "uri"], type: "string", - description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models" + description: "Model file to use for the completion. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models" }) .option("header", { alias: ["H"], diff --git a/src/cli/commands/InfillCommand.ts b/src/cli/commands/InfillCommand.ts index 7e46c7a2..5144c992 100644 --- a/src/cli/commands/InfillCommand.ts +++ b/src/cli/commands/InfillCommand.ts @@ -59,9 +59,9 @@ export const InfillCommand: CommandModule = { builder(yargs) { return yargs .option("modelPath", { - alias: ["m", "model", "path", "url"], + alias: ["m", "model", "path", "url", "uri"], type: "string", - description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models" + description: "Model file to use for the infill. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models" }) .option("header", { alias: ["H"], diff --git a/src/cli/commands/InitCommand.ts b/src/cli/commands/InitCommand.ts index 2d9e4f38..c73ea1ad 100644 --- a/src/cli/commands/InitCommand.ts +++ b/src/cli/commands/InitCommand.ts @@ -6,7 +6,6 @@ import logSymbols from "log-symbols"; import validateNpmPackageName from "validate-npm-package-name"; import fs from "fs-extra"; import {consolePromptQuestion} from "../utils/consolePromptQuestion.js"; -import {isUrl} from "../../utils/isUrl.js"; import {basicChooseFromListConsoleInteraction} from "../utils/basicChooseFromListConsoleInteraction.js"; import {splitAnsiToLines} from "../utils/splitAnsiToLines.js"; import {arrowChar} from "../../consts.js"; @@ -21,6 +20,7 @@ import {ProjectTemplateOption, projectTemplates} from "../projectTemplates.js"; import {getReadablePath} from "../utils/getReadablePath.js"; import {createModelDownloader} from "../../utils/createModelDownloader.js"; import {withCliCommandDescriptionDocsUrl} from "../utils/withCliCommandDescriptionDocsUrl.js"; +import {resolveModelDestination} from "../../utils/resolveModelDestination.js"; type InitCommand = { name?: string, @@ -93,7 +93,7 @@ export async function InitCommandHandler({name, template, gpu}: InitCommand) { logLevel: LlamaLogLevel.error }); - const modelUrl = await interactivelyAskForModel({ + const modelUri = await interactivelyAskForModel({ llama, allowLocalModels: false, downloadIntent: false @@ -113,29 +113,53 @@ export async function InitCommandHandler({name, template, gpu}: InitCommand) { await fs.ensureDir(targetDirectory); - const modelDownloader = await createModelDownloader({ - modelUrl, - showCliProgress: false, - deleteTempFileOnCancel: false - }); - const modelEntrypointFilename = modelDownloader.entrypointFilename; + async function resolveModelInfo() { + const resolvedModelDestination = resolveModelDestination(modelUri); + + if (resolvedModelDestination.type === "uri") + return { + modelUriOrUrl: resolvedModelDestination.uri, + modelUriOrFilename: resolvedModelDestination.uri, + cancelDownloader: async () => void 0 + }; + + if (resolvedModelDestination.type === "file") + throw new Error("Unexpected file model destination"); + + const modelDownloader = await createModelDownloader({ + modelUri: resolvedModelDestination.url, + showCliProgress: false, + deleteTempFileOnCancel: false + }); + const modelEntrypointFilename = modelDownloader.entrypointFilename; + + return { + modelUriOrUrl: resolvedModelDestination.url, + modelUriOrFilename: modelEntrypointFilename, + async cancelDownloader() { + try { + await modelDownloader.cancel(); + } catch (err) { + // do nothing + } + } + }; + } + + const {modelUriOrFilename, modelUriOrUrl, cancelDownloader} = await resolveModelInfo(); await scaffoldProjectTemplate({ template, directoryPath: targetDirectory, parameters: { [ProjectTemplateParameter.ProjectName]: projectName, - [ProjectTemplateParameter.ModelUrl]: modelUrl, - [ProjectTemplateParameter.ModelFilename]: modelEntrypointFilename, + [ProjectTemplateParameter.ModelUriOrUrl]: modelUriOrUrl, + [ProjectTemplateParameter.ModelUriOrFilename]: modelUriOrFilename, [ProjectTemplateParameter.CurrentModuleVersion]: await getModuleVersion() } }); - try { - await modelDownloader.cancel(); - } catch (err) { - // do nothing - } + await cancelDownloader(); await new Promise((resolve) => setTimeout(resolve, Math.max(0, minScaffoldTime - (Date.now() - startTime)))); }); @@ -213,10 +237,7 @@ async function askForProjectName(currentDirectory: string) { if (item == null) return ""; - if (isUrl(item, false)) - return logSymbols.success + " Entered project name " + chalk.blue(item); - else - return logSymbols.success + " Entered project name " + chalk.blue(item); + return logSymbols.success + " Entered project name " + chalk.blue(item); } }); diff --git a/src/cli/commands/PullCommand.ts b/src/cli/commands/PullCommand.ts index 79eee55f..567e3c6b 100644 --- a/src/cli/commands/PullCommand.ts +++ b/src/cli/commands/PullCommand.ts @@ -34,13 +34,13 @@ export const PullCommand: CommandModule = { return yargs .option("urls", { type: "string", - alias: ["url"], + alias: ["url", "uris", "uri"], array: true, description: [ - "A `.gguf` model URL to pull.", - !isInDocumentationMode && "Automatically handles split and binary-split models files, so only pass the URL to the first file of a model.", + "A `.gguf` model URI to pull.", + !isInDocumentationMode && "Automatically handles split and binary-split models files, so only pass the URI to the first file of a model.", !isInDocumentationMode && "If a file already exists and its size matches the expected size, it will not be downloaded again unless the `--override` flag is used.", - "Pass multiple URLs to download multiple models at once." + "Pass multiple URIs to download multiple models at once." ].filter(Boolean).join( isInDocumentationMode ? "\n" @@ -104,13 +104,13 @@ export const PullCommand: CommandModule = { const headers = resolveHeaderFlag(headerArg); if (urls.length === 0) - throw new Error("At least one URL must be provided"); + throw new Error("At least one URI must be provided"); else if (urls.length > 1 && filename != null) - throw new Error("The `--filename` flag can only be used when a single URL is passed"); + throw new Error("The `--filename` flag can only be used when a single URI is passed"); if (urls.length === 1) { const downloader = await createModelDownloader({ - modelUrl: urls[0]!, + modelUri: urls[0]!, dirPath: directory, headers, showCliProgress: !noProgress, @@ -155,14 +155,13 @@ export const PullCommand: CommandModule = { console.info(`Downloaded to ${chalk.yellow(getReadablePath(downloader.entrypointFilePath))}`); } else { const downloader = await combineModelDownloaders( - urls.map((url) => createModelDownloader({ - modelUrl: url, + urls.map((uri) => createModelDownloader({ + modelUri: uri, dirPath: directory, headers, showCliProgress: false, deleteTempFileOnCancel: noTempFile, - skipExisting: !override, - fileName: filename || undefined + skipExisting: !override })), { showCliProgress: !noProgress, diff --git a/src/cli/commands/inspect/commands/InspectEstimateCommand.ts b/src/cli/commands/inspect/commands/InspectEstimateCommand.ts index d5fc8ab4..dc5a2f4c 100644 --- a/src/cli/commands/inspect/commands/InspectEstimateCommand.ts +++ b/src/cli/commands/inspect/commands/InspectEstimateCommand.ts @@ -1,12 +1,8 @@ -import path from "path"; import {CommandModule} from "yargs"; import chalk from "chalk"; import bytes from "bytes"; import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js"; -import {normalizeGgufDownloadUrl} from "../../../../gguf/utils/normalizeGgufDownloadUrl.js"; -import {isUrl} from "../../../../utils/isUrl.js"; import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js"; -import {getReadablePath} from "../../../utils/getReadablePath.js"; import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js"; import {documentationPageUrls} from "../../../../config.js"; import {printInfoLine} from "../../../utils/printInfoLine.js"; @@ -22,6 +18,8 @@ import {Llama} from "../../../../bindings/Llama.js"; import {getGgufFileTypeName} from "../../../../gguf/utils/getGgufFileTypeName.js"; import {getPrettyBuildGpuName} from "../../../../bindings/consts.js"; import withOra from "../../../../utils/withOra.js"; +import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js"; +import {printModelDestination} from "../../../utils/printModelDestination.js"; type InspectEstimateCommand = { modelPath: string, @@ -41,10 +39,10 @@ export const InspectEstimateCommand: CommandModule { - return await readGgufFileInfo(ggufPath, { - fetchHeaders: isPathUrl ? headers : undefined + return await readGgufFileInfo(resolvedGgufPath, { + fetchHeaders: resolvedModelDestination.type === "file" + ? undefined + : headers }); }); const ggufInsights = await GgufInsights.from(ggufFileInfo, llama); diff --git a/src/cli/commands/inspect/commands/InspectGgufCommand.ts b/src/cli/commands/inspect/commands/InspectGgufCommand.ts index 5e7329b7..fa1ca7e3 100644 --- a/src/cli/commands/inspect/commands/InspectGgufCommand.ts +++ b/src/cli/commands/inspect/commands/InspectGgufCommand.ts @@ -7,13 +7,12 @@ import fs from "fs-extra"; import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js"; import {prettyPrintObject, PrettyPrintObjectOptions} from "../../../../utils/prettyPrintObject.js"; import {getGgufFileTypeName} from "../../../../gguf/utils/getGgufFileTypeName.js"; -import {normalizeGgufDownloadUrl} from "../../../../gguf/utils/normalizeGgufDownloadUrl.js"; -import {isUrl} from "../../../../utils/isUrl.js"; import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js"; -import {getReadablePath} from "../../../utils/getReadablePath.js"; import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js"; import {documentationPageUrls} from "../../../../config.js"; import withOra from "../../../../utils/withOra.js"; +import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js"; +import {printModelDestination} from "../../../utils/printModelDestination.js"; type InspectGgufCommand = { modelPath: string, @@ -34,10 +33,10 @@ export const InspectGgufCommand: CommandModule = { builder(yargs) { return yargs .option("modelPath", { - alias: ["m", "model", "path", "url"], + alias: ["m", "model", "path", "url", "uri"], type: "string", demandOption: true, - description: "The path or URL of the GGUF file to inspect. If a URL is provided, the metadata will be read from the remote file without downloading the entire file.", + description: "The path or URI of the GGUF file to inspect. If a URI is provided, the metadata will be read from the remote file without downloading the entire file.", group: "Required:" }) .option("header", { @@ -83,23 +82,21 @@ export const InspectGgufCommand: CommandModule = { async handler({ modelPath: ggufPath, header: headerArg, noSplice, fullTensorInfo, fullMetadataArrays, plainJson, outputToJsonFile }: InspectGgufCommand) { - const isPathUrl = isUrl(ggufPath); - const resolvedGgufPath = isPathUrl - ? normalizeGgufDownloadUrl(ggufPath) - : path.resolve(ggufPath); + const resolvedModelDestination = resolveModelDestination(ggufPath); + const resolvedGgufPath = resolvedModelDestination.type == "file" + ? resolvedModelDestination.path + : resolvedModelDestination.url; const headers = resolveHeaderFlag(headerArg); - if (!plainJson) { - if (isPathUrl) - console.info(`${chalk.yellow("URL:")} ${resolvedGgufPath}`); - else - console.info(`${chalk.yellow("File:")} ${getReadablePath(resolvedGgufPath)}`); - } + if (!plainJson) + printModelDestination(resolvedModelDestination); const parsedMetadata = plainJson - ? await readGgufFileInfo(ggufPath, { - fetchHeaders: isPathUrl ? headers : undefined, + ? await readGgufFileInfo(resolvedGgufPath, { + fetchHeaders: resolvedModelDestination.type === "file" + ? undefined + : headers, spliceSplitFiles: !noSplice }) : await withOra({ @@ -108,8 +105,10 @@ export const InspectGgufCommand: CommandModule = { fail: chalk.blue("Failed to read model metadata"), noSuccessLiveStatus: true }, async () => { - return await readGgufFileInfo(ggufPath, { - fetchHeaders: isPathUrl ? headers : undefined, + return await readGgufFileInfo(resolvedGgufPath, { + fetchHeaders: resolvedModelDestination.type === "file" + ? undefined + : headers, spliceSplitFiles: !noSplice }); }); diff --git a/src/cli/commands/inspect/commands/InspectMeasureCommand.ts b/src/cli/commands/inspect/commands/InspectMeasureCommand.ts index eb3a4279..fea3c525 100644 --- a/src/cli/commands/inspect/commands/InspectMeasureCommand.ts +++ b/src/cli/commands/inspect/commands/InspectMeasureCommand.ts @@ -44,9 +44,9 @@ export const InspectMeasureCommand: CommandModule builder(yargs) { return yargs .option("modelPath", { - alias: ["m", "model", "path", "url"], + alias: ["m", "model", "path", "url", "uri"], type: "string", - description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models" + description: "Model file to use for the measurements. Can be a path to a local file or a URI of a model file to download. Leave empty to choose from a list of recommended models" }) .option("header", { alias: ["H"], diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts index 538fb193..67d9d038 100644 --- a/src/cli/recommendedModels.ts +++ b/src/cli/recommendedModels.ts @@ -6,25 +6,11 @@ export const recommendedModels: ModelRecommendation[] = [{ description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + "This is the 8 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-8B-Instruct.Q8_0.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-8B-Instruct.Q6_K.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", + "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q6_K.gguf", + "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf" + ] }, { name: "Llama 3.1 70B", abilities: ["chat", "complete", "functionCalling"], @@ -32,31 +18,12 @@ export const recommendedModels: ModelRecommendation[] = [{ "This is the 70 billion parameters version of the model. " + "You need a GPU with a lot of VRAM to use this version.", - fileOptions: [{ - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-70B-Instruct.Q4_K_S.gguf" - } - }] + fileOptions: [ + "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2", + "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2", + "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf", + "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_S.gguf" + ] }, { name: "Llama 3.1 405B", abilities: ["chat", "complete", "functionCalling"], @@ -64,19 +31,10 @@ export const recommendedModels: ModelRecommendation[] = [{ "This is the 405 billion parameters version of the model, and its capabilities are comparable and sometimes even surpass GPT-4o and Claude 3.5 Sonnet.\n" + "You need a GPU with a lot of VRAM to use this version of Llama 3.1.", - fileOptions: [{ - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5" - } - }, { - huggingFace: { - model: "mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF", - branch: "main", - file: "Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4" - } - }] + fileOptions: [ + "hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5", + "hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4" + ] }, { name: "Mistral Nemo 12B", abilities: ["chat", "complete", "functionCalling"], @@ -84,50 +42,22 @@ export const recommendedModels: ModelRecommendation[] = [{ "It was trained jointly by Mistral AI and NVIDIA.\n" + "This is a 12 billion parameters model.", - fileOptions: [{ - huggingFace: { - model: "mradermacher/Mistral-Nemo-Instruct-2407-GGUF", - branch: "main", - file: "Mistral-Nemo-Instruct-2407.Q8_0.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Mistral-Nemo-Instruct-2407-GGUF", - branch: "main", - file: "Mistral-Nemo-Instruct-2407.Q6_K.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Mistral-Nemo-Instruct-2407-GGUF", - branch: "main", - file: "Mistral-Nemo-Instruct-2407.Q4_K_M.gguf" - } - }, { - huggingFace: { - model: "mradermacher/Mistral-Nemo-Instruct-2407-GGUF", - branch: "main", - file: "Mistral-Nemo-Instruct-2407.Q4_K_S.gguf" - } - }] + fileOptions: [ + "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q8_0.gguf", + "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q6_K.gguf", + "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf", + "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407.Q4_K_S.gguf" + ] }, { name: "Phi 3 3.8B", abilities: ["chat", "complete", "functionCalling"], description: "Phi 3 model was created by Microsoft and is optimized for strong reasoning (especially math and logic).\n" + "This is the smallversion of the model.", - fileOptions: [{ - huggingFace: { - model: "bartowski/Phi-3.1-mini-4k-instruct-GGUF", - branch: "main", - file: "Phi-3.1-mini-4k-instruct-Q8_0.gguf" - } - }, { - huggingFace: { - model: "bartowski/Phi-3.1-mini-4k-instruct-GGUF", - branch: "main", - file: "Phi-3.1-mini-4k-instruct-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q8_0.gguf", + "hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf" + ] }, { name: "OLMoE 1B 7B MoE", abilities: ["chat"], @@ -136,37 +66,13 @@ export const recommendedModels: ModelRecommendation[] = [{ "This model includes 64 expert models, with a total of 7 billion parameters.\n" + "This model generates output extremely fast.", - fileOptions: [{ - huggingFace: { - model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF", - branch: "main", - file: "olmoe-1b-7b-0924-instruct-q8_0.gguf" - } - }, { - huggingFace: { - model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF", - branch: "main", - file: "olmoe-1b-7b-0924-instruct-q6_k.gguf" - } - }, { - huggingFace: { - model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF", - branch: "main", - file: "olmoe-1b-7b-0924-instruct-q5_k_m.gguf" - } - }, { - huggingFace: { - model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF", - branch: "main", - file: "olmoe-1b-7b-0924-instruct-q4_k_s.gguf" - } - }, { - huggingFace: { - model: "allenai/OLMoE-1B-7B-0924-Instruct-GGUF", - branch: "main", - file: "olmoe-1b-7b-0924-instruct-q4_k_m.gguf" - } - }] + fileOptions: [ + "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf", + "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q6_k.gguf", + "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q5_k_m.gguf", + "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_s.gguf", + "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_m.gguf" + ] }, { name: "Mixtral 8x7B MoE", abilities: ["chat", "complete"], @@ -174,38 +80,20 @@ export const recommendedModels: ModelRecommendation[] = [{ "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" + "This model includes 8 expert models, each with 7 billion parameters.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/Mixtral-8x7B-v0.1-GGUF", - branch: "main", - file: "mixtral-8x7b-v0.1.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/Mixtral-8x7B-v0.1-GGUF", - branch: "main", - file: "mixtral-8x7b-v0.1.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q5_K_M.gguf", + "hf:TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q4_K_M.gguf" + ] }, { name: "Mistral 7B Instruct v0.2", abilities: ["chat", "complete"], description: "Mistral models were created by Mistal AI and are general purpose models.\n" + "This is the 7 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", - branch: "main", - file: "mistral-7b-instruct-v0.2.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", - branch: "main", - file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q5_K_M.gguf", + "hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf" + ] }, { name: "Dolphin 2.5 Mixtral 8x7B MoE", abilities: ["chat", "complete"], @@ -213,19 +101,10 @@ export const recommendedModels: ModelRecommendation[] = [{ "See the Mixtral model above for more information about Mixtral models.\n" + "This model includes 8 expert models, each with 7 billion parameters.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/dolphin-2.5-mixtral-8x7b-GGUF", - branch: "main", - file: "dolphin-2.5-mixtral-8x7b.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/dolphin-2.5-mixtral-8x7b-GGUF", - branch: "main", - file: "dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q5_K_M.gguf", + "hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q4_K_M.gguf" + ] }, { name: "Gemma 2 9B", abilities: ["chat", "complete"], @@ -233,49 +112,15 @@ export const recommendedModels: ModelRecommendation[] = [{ "including question answering, summarization, and reasoning, with a focus on responsible responses.\n" + "This is the 9 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q6_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q6_K.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q5_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q5_K_S.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q4_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-9b-it-GGUF", - branch: "main", - file: "gemma-2-9b-it-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q6_K_L.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q6_K.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q5_K_L.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q5_K_M.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q5_K_S.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_L.gguf", + "hf:bartowski/gemma-2-9b-it-GGUF/gemma-2-9b-it-Q4_K_M.gguf" + ] }, { name: "Gemma 2 2B", abilities: ["chat", "complete"], @@ -283,37 +128,13 @@ export const recommendedModels: ModelRecommendation[] = [{ "including question answering, summarization, and reasoning, with a focus on responsible responses.\n" + "This is the 2 billion parameters version of the model and is significantly less powerful than the 9B version.", - fileOptions: [{ - huggingFace: { - model: "bartowski/gemma-2-2b-it-GGUF", - branch: "main", - file: "gemma-2-2b-it-Q6_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-2b-it-GGUF", - branch: "main", - file: "gemma-2-2b-it-Q6_K.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-2b-it-GGUF", - branch: "main", - file: "gemma-2-2b-it-Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-2b-it-GGUF", - branch: "main", - file: "gemma-2-2b-it-Q5_K_S.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-2b-it-GGUF", - branch: "main", - file: "gemma-2-2b-it-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/gemma-2-2b-it-GGUF/gemma-2-2b-it-Q6_K_L.gguf", + "hf:bartowski/gemma-2-2b-it-GGUF/gemma-2-2b-it-Q6_K.gguf", + "hf:bartowski/gemma-2-2b-it-GGUF/gemma-2-2b-it-Q5_K_M.gguf", + "hf:bartowski/gemma-2-2b-it-GGUF/gemma-2-2b-it-Q5_K_S.gguf", + "hf:bartowski/gemma-2-2b-it-GGUF/gemma-2-2b-it-Q4_K_M.gguf" + ] }, { name: "Gemma 2 27B", abilities: ["chat", "complete"], @@ -322,106 +143,45 @@ export const recommendedModels: ModelRecommendation[] = [{ "This is the 27 billion parameters version of the model.\n" + "Since the model is relatively big, it may not run well on your machine", - fileOptions: [{ - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q6_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q6_K.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q5_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q5_K_S.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q4_K_L.gguf" - } - }, { - huggingFace: { - model: "bartowski/gemma-2-27b-it-GGUF", - branch: "main", - file: "gemma-2-27b-it-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q6_K_L.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q6_K.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q5_K_L.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q5_K_M.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q5_K_S.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_L.gguf", + "hf:bartowski/gemma-2-27b-it-GGUF/gemma-2-27b-it-Q4_K_M.gguf" + ] }, { name: "Orca 2 13B", abilities: ["chat", "complete"], description: "Orca 2 model was created by Microsoft and is optimized for reasoning over given data, reading comprehensions, math problem solving and text summarization.\n" + "This is the 13 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/Orca-2-13B-GGUF", - branch: "main", - file: "orca-2-13b.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/Orca-2-13B-GGUF", - branch: "main", - file: "orca-2-13b.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/Orca-2-13B-GGUF/orca-2-13b.Q5_K_M.gguf", + "hf:TheBloke/Orca-2-13B-GGUF/orca-2-13b.Q4_K_M.gguf" + ] }, { name: "Code Llama 7B", abilities: ["chat", "complete", "infill"], description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" + "This is the 7 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/CodeLlama-7B-GGUF", - branch: "main", - file: "codellama-7b.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/CodeLlama-7B-GGUF", - branch: "main", - file: "codellama-7b.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q5_K_M.gguf", + "hf:TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf" + ] }, { name: "Code Llama 13B", abilities: ["chat", "complete", "infill"], description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" + "This is the 13 billion parameters version of the model.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/CodeLlama-13B-GGUF", - branch: "main", - file: "codellama-13b.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/CodeLlama-13B-GGUF", - branch: "main", - file: "codellama-13b.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/CodeLlama-13B-GGUF/codellama-13b.Q5_K_M.gguf", + "hf:TheBloke/CodeLlama-13B-GGUF/codellama-13b.Q4_K_M.gguf" + ] }, { name: "Code Llama 34B", abilities: ["chat", "complete", "infill"], @@ -429,19 +189,10 @@ export const recommendedModels: ModelRecommendation[] = [{ "This is the 34 billion parameters version of the model.\n" + "You need a GPU with handful of VRAM to use this version.", - fileOptions: [{ - huggingFace: { - model: "TheBloke/CodeLlama-34B-GGUF", - branch: "main", - file: "codellama-34b.Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "TheBloke/CodeLlama-34B-GGUF", - branch: "main", - file: "codellama-34b.Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:TheBloke/CodeLlama-34B-GGUF/codellama-34b.Q5_K_M.gguf", + "hf:TheBloke/CodeLlama-34B-GGUF/codellama-34b.Q4_K_M.gguf" + ] }, { name: "CodeGemma 2B", abilities: ["code", "complete", "infill"], @@ -450,37 +201,13 @@ export const recommendedModels: ModelRecommendation[] = [{ "This model is not suited for chat.\n" + "This is the 2 billion parameters version of the model.\n", - fileOptions: [{ - huggingFace: { - model: "bartowski/codegemma-2b-GGUF", - branch: "main", - file: "codegemma-2b-Q8_0.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-2b-GGUF", - branch: "main", - file: "codegemma-2b-Q6_K.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-2b-GGUF", - branch: "main", - file: "codegemma-2b-Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-2b-GGUF", - branch: "main", - file: "codegemma-2b-Q5_K_S.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-2b-GGUF", - branch: "main", - file: "codegemma-2b-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/codegemma-2b-GGUF/codegemma-2b-Q8_0.gguf", + "hf:bartowski/codegemma-2b-GGUF/codegemma-2b-Q6_K.gguf", + "hf:bartowski/codegemma-2b-GGUF/codegemma-2b-Q5_K_M.gguf", + "hf:bartowski/codegemma-2b-GGUF/codegemma-2b-Q5_K_S.gguf", + "hf:bartowski/codegemma-2b-GGUF/codegemma-2b-Q4_K_M.gguf" + ] }, { name: "CodeGemma 7B", abilities: ["code", "complete", "infill"], @@ -489,47 +216,19 @@ export const recommendedModels: ModelRecommendation[] = [{ "This model is not suited for chat.\n" + "This is the 7 billion parameters version of the model.\n", - fileOptions: [{ - huggingFace: { - model: "bartowski/codegemma-1.1-7b-it-GGUF", - branch: "main", - file: "codegemma-1.1-7b-it-Q6_K.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-1.1-7b-it-GGUF", - branch: "main", - file: "codegemma-1.1-7b-it-Q5_K_M.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-1.1-7b-it-GGUF", - branch: "main", - file: "codegemma-1.1-7b-it-Q5_K_S.gguf" - } - }, { - huggingFace: { - model: "bartowski/codegemma-1.1-7b-it-GGUF", - branch: "main", - file: "codegemma-1.1-7b-it-Q4_K_M.gguf" - } - }] + fileOptions: [ + "hf:bartowski/codegemma-1.1-7b-it-GGUF/codegemma-1.1-7b-it-Q6_K.gguf", + "hf:bartowski/codegemma-1.1-7b-it-GGUF/codegemma-1.1-7b-it-Q5_K_M.gguf", + "hf:bartowski/codegemma-1.1-7b-it-GGUF/codegemma-1.1-7b-it-Q5_K_S.gguf", + "hf:bartowski/codegemma-1.1-7b-it-GGUF/codegemma-1.1-7b-it-Q4_K_M.gguf" + ] }, { name: "Stable Code Instruct 3B", abilities: ["chat", "complete", "infill"], description: "Stable Code models were created by Stability AI and are optimized for code completion.", - fileOptions: [{ - huggingFace: { - model: "stabilityai/stable-code-instruct-3b", - branch: "main", - file: "stable-code-3b-q5_k_m.gguf" - } - }, { - huggingFace: { - model: "stabilityai/stable-code-instruct-3b", - branch: "main", - file: "stable-code-3b-q4_k_m.gguf" - } - }] + fileOptions: [ + "hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q5_k_m.gguf", + "hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q4_k_m.gguf" + ] }]; diff --git a/src/cli/utils/interactivelyAskForModel.ts b/src/cli/utils/interactivelyAskForModel.ts index d4cec8cc..7dc264a2 100644 --- a/src/cli/utils/interactivelyAskForModel.ts +++ b/src/cli/utils/interactivelyAskForModel.ts @@ -15,6 +15,7 @@ import {readGgufFileInfo} from "../../gguf/readGgufFileInfo.js"; import {getPrettyBuildGpuName} from "../../bindings/consts.js"; import {GgufInsightsConfigurationResolver} from "../../gguf/insights/GgufInsightsConfigurationResolver.js"; import {isUrl} from "../../utils/isUrl.js"; +import {isModelUri, parseModelUri} from "../../utils/parseModelUri.js"; import {resolveModelRecommendationFileOptions} from "./resolveModelRecommendationFileOptions.js"; import {getReadablePath} from "./getReadablePath.js"; import {basicChooseFromListConsoleInteraction} from "./basicChooseFromListConsoleInteraction.js"; @@ -36,13 +37,13 @@ type ModelOption = { type: "recommendedModel", title: string | (() => string), description?: string, - potentialUrls: string[], - selectedUrl?: { - url: string, + potentialUris: string[], + selectedUri?: { + uri: string, ggufInsights: GgufInsights, compatibilityScore: Awaited> }, - urlSelectionLoadingState?: "done" | "loading" + uriSelectionLoadingState?: "done" | "loading" } | { type: "separator", text: string | (() => string) @@ -154,13 +155,13 @@ export async function interactivelyAskForModel({ const {recommendedModels} = await import("../recommendedModels.js"); for (const recommendedModel of recommendedModels) { - const potentialUrls = resolveModelRecommendationFileOptions(recommendedModel); + const potentialUris = resolveModelRecommendationFileOptions(recommendedModel); - if (potentialUrls.length > 0) + if (potentialUris.length > 0) recommendedModelOptions.push({ type: "recommendedModel", title: recommendedModel.name, - potentialUrls, + potentialUris, description: recommendedModel.description }); } @@ -173,8 +174,8 @@ export async function interactivelyAskForModel({ { type: "action", text: allowLocalModels - ? "Enter a model URL or file path..." - : "Enter a model URL...", + ? "Enter a model URI or file path..." + : "Enter a model URI...", key: "getPath" }, ...( @@ -283,7 +284,7 @@ export async function interactivelyAskForModel({ " ".repeat(leftPad) + chalk.bold.gray("Model description") + "\n" + lines.map((line) => (" ".repeat(leftPad) + line)) .join("\n") + "\n" + - splitAnsiToLines(renderRecommendedModelTechnicalInfo(item.selectedUrl, maxWidth, canUseGpu), maxWidth) + splitAnsiToLines(renderRecommendedModelTechnicalInfo(item.selectedUri, maxWidth, canUseGpu), maxWidth) .map((line) => (" ".repeat(leftPad) + line)) .join("\n"); }, @@ -296,7 +297,7 @@ export async function interactivelyAskForModel({ }, canSelectItem(item) { if (item.type === "recommendedModel") - return item.selectedUrl != null; + return item.selectedUri != null; return item.type === "localModel" || item.type === "action"; }, @@ -330,17 +331,17 @@ export async function interactivelyAskForModel({ continue; else if (selectedItem.type === "localModel") return selectedItem.path; - else if (selectedItem.type === "recommendedModel" && selectedItem.selectedUrl != null) - return selectedItem.selectedUrl.url; + else if (selectedItem.type === "recommendedModel" && selectedItem.selectedUri != null) + return selectedItem.selectedUri.uri; else if (selectedItem.type === "action") { if (selectedItem.key === "getPath") { initialFocusIndex = 0; - const selectedModelUrlOrPath = await askForModelUrlOrPath(allowLocalModels); + const selectedModelUriOrPath = await askForModelUriOrPath(allowLocalModels); - if (selectedModelUrlOrPath == null) + if (selectedModelUriOrPath == null) continue; - return selectedModelUrlOrPath; + return selectedModelUriOrPath; } } } @@ -349,11 +350,11 @@ export async function interactivelyAskForModel({ } } -async function askForModelUrlOrPath(allowLocalModels: boolean): Promise { +async function askForModelUriOrPath(allowLocalModels: boolean): Promise { return await consolePromptQuestion( allowLocalModels - ? chalk.bold("Enter a model URL or file path: ") - : chalk.bold("Enter a model URL: "), + ? chalk.bold("Enter a model URI or file path: ") + : chalk.bold("Enter a model URI: "), { exitOnCtrlC: false, async validate(input) { @@ -365,8 +366,19 @@ async function askForModelUrlOrPath(allowLocalModels: boolean): Promise bestScore) { bestScore = compatibilityScore.compatibilityScore; - bestScoreSelectedUrl = { - url: potentialUrl, + bestScoreSelectedUri = { + uri: potentialUri, ggufInsights, compatibilityScore }; @@ -582,11 +596,11 @@ async function selectFileForModelRecommendation({ } } - recommendedModelOption.selectedUrl = bestScoreSelectedUrl; - recommendedModelOption.urlSelectionLoadingState = "done"; + recommendedModelOption.selectedUri = bestScoreSelectedUri; + recommendedModelOption.uriSelectionLoadingState = "done"; rerenderOption(); } catch (err) { - recommendedModelOption.urlSelectionLoadingState = "done"; + recommendedModelOption.uriSelectionLoadingState = "done"; rerenderOption(); } } diff --git a/src/cli/utils/printModelDestination.ts b/src/cli/utils/printModelDestination.ts new file mode 100644 index 00000000..ab39d4ab --- /dev/null +++ b/src/cli/utils/printModelDestination.ts @@ -0,0 +1,12 @@ +import chalk from "chalk"; +import {ResolveModelDestination} from "../../utils/resolveModelDestination.js"; +import {getReadablePath} from "./getReadablePath.js"; + +export function printModelDestination(modelDestination: ResolveModelDestination) { + if (modelDestination.type === "url") + console.info(`${chalk.yellow("URL:")} ${modelDestination.url}`); + else if (modelDestination.type === "uri") + console.info(`${chalk.yellow("URI:")} ${modelDestination.uri}`); + else + console.info(`${chalk.yellow("File:")} ${getReadablePath(modelDestination.path)}`); +} diff --git a/src/cli/utils/projectTemplates.ts b/src/cli/utils/projectTemplates.ts index 5a10e445..8f65efc9 100644 --- a/src/cli/utils/projectTemplates.ts +++ b/src/cli/utils/projectTemplates.ts @@ -4,8 +4,8 @@ import fs from "fs-extra"; export const enum ProjectTemplateParameter { ProjectName = "projectName", CurrentModuleVersion = "currentNodeLlamaCppModuleVersion", - ModelUrl = "modelUrl", - ModelFilename = "modelFilename", + ModelUriOrUrl = "modelUriOrUrl", + ModelUriOrFilename = "modelUriOrFilename" } export type PackagedFileEntry = { diff --git a/src/cli/utils/resolveCommandGgufPath.ts b/src/cli/utils/resolveCommandGgufPath.ts index e69f5afb..6c9d638a 100644 --- a/src/cli/utils/resolveCommandGgufPath.ts +++ b/src/cli/utils/resolveCommandGgufPath.ts @@ -1,12 +1,10 @@ -import path from "path"; import process from "process"; import chalk from "chalk"; import fs from "fs-extra"; import {cliModelsDirectory} from "../../config.js"; -import {normalizeGgufDownloadUrl} from "../../gguf/utils/normalizeGgufDownloadUrl.js"; import {Llama} from "../../bindings/Llama.js"; -import {isUrl} from "../../utils/isUrl.js"; import {createModelDownloader} from "../../utils/createModelDownloader.js"; +import {resolveModelDestination} from "../../utils/resolveModelDestination.js"; import {ConsoleInteraction, ConsoleInteractionKey} from "./ConsoleInteraction.js"; import {getReadablePath} from "./getReadablePath.js"; import {interactivelyAskForModel} from "./interactivelyAskForModel.js"; @@ -16,10 +14,8 @@ export async function resolveCommandGgufPath(ggufPath: string | undefined, llama }: { targetDirectory?: string, flashAttention?: boolean } = {}) { - let resolvedGgufPath = ggufPath; - - if (resolvedGgufPath == null) - resolvedGgufPath = await interactivelyAskForModel({ + if (ggufPath == null) + ggufPath = await interactivelyAskForModel({ llama, modelsDirectory: targetDirectory, allowLocalModels: true, @@ -27,23 +23,22 @@ export async function resolveCommandGgufPath(ggufPath: string | undefined, llama flashAttention }); - if (!isUrl(resolvedGgufPath)) { + const resolvedModelDestination = resolveModelDestination(ggufPath); + if (resolvedModelDestination.type === "file") { try { - const resolvedPath = path.resolve(process.cwd(), resolvedGgufPath); - - if (await fs.pathExists(resolvedPath)) - return resolvedPath; + if (await fs.pathExists(resolvedModelDestination.path)) + return resolvedModelDestination.path; } catch (err) { - throw new Error(`Invalid path: ${resolvedGgufPath}`); + throw new Error(`Invalid path: ${resolvedModelDestination.path}`); } - throw new Error(`File does not exist: ${path.resolve(process.cwd(), resolvedGgufPath)}`); + throw new Error(`File does not exist: ${resolvedModelDestination.path}`); } - resolvedGgufPath = normalizeGgufDownloadUrl(resolvedGgufPath); - const downloader = await createModelDownloader({ - modelUrl: resolvedGgufPath, + modelUri: resolvedModelDestination.type === "uri" + ? resolvedModelDestination.uri + : resolvedModelDestination.url, dirPath: targetDirectory, headers: fetchHeaders, showCliProgress: true, @@ -95,4 +90,3 @@ export async function resolveCommandGgufPath(ggufPath: string | undefined, llama return downloader.entrypointFilePath; } - diff --git a/src/cli/utils/resolveModelRecommendationFileOptions.ts b/src/cli/utils/resolveModelRecommendationFileOptions.ts index 78ab06bf..a0ddd6f7 100644 --- a/src/cli/utils/resolveModelRecommendationFileOptions.ts +++ b/src/cli/utils/resolveModelRecommendationFileOptions.ts @@ -1,5 +1,13 @@ -import {normalizeGgufDownloadUrl} from "../../gguf/utils/normalizeGgufDownloadUrl.js"; +import {resolveModelDestination} from "../../utils/resolveModelDestination.js"; +export type ModelURI = `${ + `http://${string}/${string}` | + `https://${string}/${string}` | + `hf:${string}/${string}/${string}` | + `huggingface:${string}/${string}/${string}` +}${ + ".gguf" | `.gguf.part${number}of${number}` +}`; export type ModelRecommendation = { name: string, abilities: ("code" | "chat" | "complete" | "infill" | "functionCalling")[], @@ -11,19 +19,19 @@ export type ModelRecommendation = { * will be used (and the rest of the files won't even be tested), * otherwise, the file with the highest compatibility will be used. */ - fileOptions: Array<{ - huggingFace: { - model: `${string}/${string}`, - branch: string, - file: `${string}.gguf` | `${string}.gguf.part${number}of${number}` - } - }> + fileOptions: ModelURI[] }; export function resolveModelRecommendationFileOptions(modelRecommendation: ModelRecommendation) { return modelRecommendation.fileOptions.map((fileOption) => { - return normalizeGgufDownloadUrl( - `https://huggingface.co/${fileOption.huggingFace.model}/resolve/${fileOption.huggingFace.branch}/${fileOption.huggingFace.file}` - ); + const resolvedModelDestination = resolveModelDestination(fileOption, true); + + if (resolvedModelDestination.type === "file") + throw new Error(`File option "${fileOption}" is not a valid model URI`); + + if (resolvedModelDestination.type === "uri") + return resolvedModelDestination.uri; + + return resolvedModelDestination.url; }); } diff --git a/src/gguf/consts.ts b/src/gguf/consts.ts index 9d13378e..d900980e 100644 --- a/src/gguf/consts.ts +++ b/src/gguf/consts.ts @@ -7,7 +7,7 @@ export const ggufDefaultFetchRetryOptions: retry.Options = { maxTimeout: 1000 * 16 } as const; -export const defaultExtraAllocationSize = 1024 * 1024 * 1.5; // 1.5MB +export const defaultExtraAllocationSize = 1024 * 1024 * 4; // 4MB export const noDirectSubNestingGGufMetadataKeys: readonly string[] = [ "general.license", diff --git a/src/gguf/insights/GgufInsightsConfigurationResolver.ts b/src/gguf/insights/GgufInsightsConfigurationResolver.ts index 256f116a..edf56084 100644 --- a/src/gguf/insights/GgufInsightsConfigurationResolver.ts +++ b/src/gguf/insights/GgufInsightsConfigurationResolver.ts @@ -141,19 +141,25 @@ export class GgufInsightsConfigurationResolver { * - Whether all layers can be offloaded to the GPU (gives additional points) * - Whether the resolved context size is at least as large as the specified `contextSize` * - * IF the resolved context size is larger than the specified context size, for each multiplier of the specified `contextSize` + * If the resolved context size is larger than the specified context size, for each multiplier of the specified `contextSize` * that the resolved context size is larger by, 1 bonus point is given in the `bonusScore`. * + * `maximumFittedContextSizeMultiplier` is used to improve the proportionality of the bonus score between models. + * Set this to any value higher than ` / contextSize`. + * Defaults to `100`. + * * `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead). */ public async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false, - flashAttention = false + flashAttention = false, + maximumFittedContextSizeMultiplier = 100 }: { contextSize?: number, embeddingContext?: boolean, - flashAttention?: boolean + flashAttention?: boolean, + maximumFittedContextSizeMultiplier?: number } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => ({total: os.totalmem(), free: os.freemem()})), @@ -269,7 +275,12 @@ export class GgufInsightsConfigurationResolver { ) ) ); - const bonusContextSizePoints = 10 * Math.min(1, Math.max(0, resolvedContextSize - contextSize) / contextSize); + const bonusContextSizePoints = 10 * Math.min( + 1, + ( + Math.max(0, resolvedContextSize - contextSize) / contextSize + ) / maximumFittedContextSizeMultiplier + ); const compatibilityScore = canUseGpu ? ( diff --git a/src/gguf/readGgufFileInfo.ts b/src/gguf/readGgufFileInfo.ts index 6947ea82..8ed82ddb 100644 --- a/src/gguf/readGgufFileInfo.ts +++ b/src/gguf/readGgufFileInfo.ts @@ -1,6 +1,7 @@ import retry from "async-retry"; import {isUrl} from "../utils/isUrl.js"; import {ModelFileAccessTokens} from "../utils/modelFileAccesTokens.js"; +import {isModelUri, parseModelUri} from "../utils/parseModelUri.js"; import {parseGguf} from "./parser/parseGguf.js"; import {GgufNetworkFetchFileReader} from "./fileReaders/GgufNetworkFetchFileReader.js"; import {GgufFsFileReader} from "./fileReaders/GgufFsFileReader.js"; @@ -13,10 +14,10 @@ import {GgufFileInfo} from "./types/GgufFileInfoTypes.js"; /** * Read a GGUF file and return its metadata and tensor info (unless `readTensorInfo` is set to `false`). * Only the parts of the file required for the metadata and tensor info are read. - * @param pathOrUrl + * @param pathOrUri * @param options */ -export async function readGgufFileInfo(pathOrUrl: string, { +export async function readGgufFileInfo(pathOrUri: string, { readTensorInfo = true, sourceType, ignoreKeys = [], @@ -70,12 +71,13 @@ export async function readGgufFileInfo(pathOrUrl: string, { tokens?: ModelFileAccessTokens } = {}) { - const useNetworkReader = sourceType === "network" || (sourceType == null && isUrl(pathOrUrl)); + const useNetworkReader = sourceType === "network" || (sourceType == null && (isUrl(pathOrUri) || isModelUri(pathOrUri))); - function createFileReader(pathOrUrl: string) { + function createFileReader(pathOrUri: string) { if (useNetworkReader) { + const parsedModelUri = parseModelUri(pathOrUri); return new GgufNetworkFetchFileReader({ - url: normalizeGgufDownloadUrl(pathOrUrl), + url: parsedModelUri?.resolvedUrl ?? normalizeGgufDownloadUrl(pathOrUri), retryOptions: fetchRetryOptions, headers: fetchHeaders, signal, @@ -83,7 +85,7 @@ export async function readGgufFileInfo(pathOrUrl: string, { }); } else if (sourceType === "filesystem" || sourceType == null) { return new GgufFsFileReader({ - filePath: pathOrUrl, + filePath: pathOrUri, signal }); } @@ -92,8 +94,8 @@ export async function readGgufFileInfo(pathOrUrl: string, { throw new Error(`Unsupported sourceType: ${sourceType}`); } - async function readSingleFile(pathOrUrl: string) { - const fileReader = createFileReader(pathOrUrl); + async function readSingleFile(pathOrUri: string) { + const fileReader = createFileReader(pathOrUri); return await parseGguf({ fileReader, ignoreKeys, @@ -103,9 +105,9 @@ export async function readGgufFileInfo(pathOrUrl: string, { } if (!spliceSplitFiles) - return await readSingleFile(pathOrUrl); + return await readSingleFile(pathOrUri); - const allSplitPartPaths = resolveSplitGgufParts(pathOrUrl); + const allSplitPartPaths = resolveSplitGgufParts(pathOrUri); if (allSplitPartPaths.length === 1) return await readSingleFile(allSplitPartPaths[0]!); diff --git a/src/gguf/utils/resolveBinarySplitGgufPartUrls.ts b/src/gguf/utils/resolveBinarySplitGgufPartUrls.ts index 87ac6113..c4cfd66c 100644 --- a/src/gguf/utils/resolveBinarySplitGgufPartUrls.ts +++ b/src/gguf/utils/resolveBinarySplitGgufPartUrls.ts @@ -40,7 +40,7 @@ export function getFilenameForBinarySplitGgufPartUrls(urls: string[]) { const firstParsedUrl = new URL(urls[0]!); if (binarySplitGgufPartsRegex.test(firstParsedUrl.pathname)) { - const ggufIndex = firstParsedUrl.pathname.indexOf(".gguf"); + const ggufIndex = firstParsedUrl.pathname.toLowerCase().indexOf(".gguf"); const urlWithoutPart = firstParsedUrl.pathname.slice(0, ggufIndex + ".gguf".length); const filename = decodeURIComponent(urlWithoutPart.split("/").pop()!); diff --git a/src/gguf/utils/resolveSplitGgufParts.ts b/src/gguf/utils/resolveSplitGgufParts.ts index 5455c100..f6c7255e 100644 --- a/src/gguf/utils/resolveSplitGgufParts.ts +++ b/src/gguf/utils/resolveSplitGgufParts.ts @@ -2,18 +2,18 @@ import {isUrl} from "../../utils/isUrl.js"; const splitGgufPartRegex = /-(?\d{5})-of-(?\d{5})\.gguf$/; -export function resolveSplitGgufParts(ggufPathOrUrl: string) { - if (isUrl(ggufPathOrUrl)) { - const parsedUrl = new URL(ggufPathOrUrl); +export function resolveSplitGgufParts(ggufPathOrUri: string) { + if (isUrl(ggufPathOrUri)) { + const parsedUrl = new URL(ggufPathOrUri); return resolveParts(parsedUrl.pathname).map((part) => { - const url = new URL(ggufPathOrUrl); + const url = new URL(ggufPathOrUri); url.pathname = part; return url.href; }); } - return resolveParts(ggufPathOrUrl); + return resolveParts(ggufPathOrUri); } function resolveParts(ggufPath: string) { @@ -68,7 +68,18 @@ export function getGgufSplitPartsInfo(ggufPath: string) { } export function createSplitPartFilename(filename: string, part: number, parts: number) { - if (filename.endsWith(".gguf")) + const splitPartMatch = filename.match(splitGgufPartRegex); + if (splitPartMatch != null) { + const partsInfo = getGgufSplitPartsInfo(filename); + if (partsInfo != null) { + const {matchLength} = partsInfo; + const commonPath = filename.slice(0, filename.length - matchLength); + + filename = commonPath + ".gguf"; + } + } + + if (filename.toLowerCase().endsWith(".gguf")) filename = filename.slice(0, -".gguf".length); return `${filename}-${String(part).padStart(5, "0")}-of-${String(parts).padStart(5, "0")}.gguf`; diff --git a/src/index.ts b/src/index.ts index a8d5454d..286a71eb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,6 +5,7 @@ import {NoBinaryFoundError} from "./bindings/utils/NoBinaryFoundError.js"; import { type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js"; +import {resolveModelFile, type ResolveModelFileOptions} from "./utils/resolveModelFile.js"; import {LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens} from "./evaluator/LlamaModel/LlamaModel.js"; import {TokenAttributes} from "./evaluator/LlamaModel/utils/TokenAttributes.js"; import {LlamaGrammar, type LlamaGrammarOptions} from "./evaluator/LlamaGrammar.js"; @@ -109,6 +110,8 @@ export { type LlamaGpuType, LlamaLogLevel, NoBinaryFoundError, + resolveModelFile, + type ResolveModelFileOptions, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, diff --git a/src/utils/createModelDownloader.ts b/src/utils/createModelDownloader.ts index ef19e6ed..5885e033 100644 --- a/src/utils/createModelDownloader.ts +++ b/src/utils/createModelDownloader.ts @@ -2,17 +2,30 @@ import process from "process"; import path from "path"; import {DownloadEngineMultiDownload, DownloadEngineNodejs, downloadFile, downloadSequence} from "ipull"; import fs from "fs-extra"; -import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js"; import {createSplitPartFilename, resolveSplitGgufParts} from "../gguf/utils/resolveSplitGgufParts.js"; import {getFilenameForBinarySplitGgufPartUrls, resolveBinarySplitGgufPartUrls} from "../gguf/utils/resolveBinarySplitGgufPartUrls.js"; import {cliModelsDirectory, isCI} from "../config.js"; import {safeEventCallback} from "./safeEventCallback.js"; import {ModelFileAccessTokens, resolveModelFileAccessTokensTryHeaders} from "./modelFileAccesTokens.js"; import {pushAll} from "./pushAll.js"; +import {resolveModelDestination} from "./resolveModelDestination.js"; -export type ModelDownloaderOptions = { - modelUrl: string, - +export type ModelDownloaderOptions = ({ + /** + * The URI to download the model from. + * + * The supported URI schemes are: + * - **HTTP:** `https://`, `http://` + * - **Hugging Face:** `hf://#` (`#` is optional) + */ + modelUri: string +} | { + /** + * @hidden + * @deprecated Use `modelUri` instead. + */ + modelUrl: string +}) & { /** * The directory to save the model file to. * Default to `node-llama-cpp`'s default global models directory (`~/.node-llama-cpp/models`). @@ -54,18 +67,41 @@ export type ModelDownloaderOptions = { }; /** - * Create a model downloader to download a model from a URL. + * Create a model downloader to download a model from a URI. * Uses [`ipull`](https://github.com/ido-pluto/ipull) to download a model file as fast as possible with parallel connections * and other optimizations. * - * If the url points to a `.gguf` file that is split into multiple parts (for example, `model-00001-of-00009.gguf`), + * If the uri points to a `.gguf` file that is split into multiple parts (for example, `model-00001-of-00009.gguf`), * all the parts will be downloaded to the specified directory. * - * If the url points to a `.gguf` file that is binary split into multiple parts (for example, `model.gguf.part1of9`), + * If the uri points to a `.gguf` file that is binary split into multiple parts (for example, `model.gguf.part1of9`), * all the parts will be spliced into a single file and be downloaded to the specified directory. * - * If the url points to a `.gguf` file that is not split or binary spliced (for example, `model.gguf`), + * If the uri points to a `.gguf` file that is not split or binary spliced (for example, `model.gguf`), * the file will be downloaded to the specified directory. + * + * The supported URI schemes are: + * - **HTTP:** `https://`, `http://` + * - **Hugging Face:** `hf://#` (`#` is optional) + * @example + * ```typescript + * import {fileURLToPath} from "url"; + * import path from "path"; + * import {createModelDownloader, getLlama} from "node-llama-cpp"; + * + * const __dirname = path.dirname(fileURLToPath(import.meta.url)); + * + * const downloader = await createModelDownloader({ + * modelUri: "https://example.com/model.gguf", + * dirPath: path.join(__dirname, "models") + * }); + * const modelPath = await downloader.download(); + * + * const llama = await getLlama(); + * const model = await llama.loadModel({ + * modelPath + * }); + * ``` * @example * ```typescript * import {fileURLToPath} from "url"; @@ -75,7 +111,7 @@ export type ModelDownloaderOptions = { * const __dirname = path.dirname(fileURLToPath(import.meta.url)); * * const downloader = await createModelDownloader({ - * modelUrl: "https://example.com/model.gguf", + * modelUri: "hf:user/model/model-file.gguf", * dirPath: path.join(__dirname, "models") * }); * const modelPath = await downloader.download(); @@ -107,11 +143,11 @@ export async function createModelDownloader(options: ModelDownloaderOptions) { * * const downloaders = [ * createModelDownloader({ - * modelUrl: "https://example.com/model1.gguf", + * modelUri: "https://example.com/model1.gguf", * dirPath: path.join(__dirname, "models") * }), * createModelDownloader({ - * modelUrl: "https://example.com/model2.gguf", + * modelUri: "hf:user/model/model2.gguf", * dirPath: path.join(__dirname, "models") * }) * ]; @@ -160,16 +196,31 @@ export class ModelDownloader { /** @internal */ private _totalFiles?: number; /** @internal */ private _tryHeaders: Record[] = []; - private constructor({ - modelUrl, dirPath = cliModelsDirectory, fileName, headers, showCliProgress = false, onProgress, deleteTempFileOnCancel = true, - skipExisting = true, parallelDownloads = 4, tokens - }: ModelDownloaderOptions) { - if (modelUrl == null || dirPath == null) - throw new Error("modelUrl and dirPath cannot be null"); + private constructor(options: ModelDownloaderOptions) { + const { + modelUri, modelUrl, + dirPath = cliModelsDirectory, fileName, headers, showCliProgress = false, onProgress, deleteTempFileOnCancel = true, + skipExisting = true, parallelDownloads = 4, tokens + } = options as ModelDownloaderOptions & { + modelUri: string, + modelUrl: string + }; + const resolvedModelUri = modelUri || modelUrl; - this._modelUrl = normalizeGgufDownloadUrl(modelUrl); + if (resolvedModelUri == null || dirPath == null) + throw new Error("modelUri and dirPath cannot be null"); + + const resolvedModelDestination = resolveModelDestination(resolvedModelUri); + + this._modelUrl = resolvedModelDestination.type === "file" + ? path.join(dirPath, resolvedModelUri) + : resolvedModelDestination.url; this._dirPath = path.resolve(process.cwd(), dirPath); - this._fileName = fileName; + this._fileName = fileName || ( + resolvedModelDestination.type === "uri" + ? resolvedModelDestination.parsedUri.fullFilename + : undefined + ); this._headers = headers; this._showCliProgress = showCliProgress; this._onProgress = safeEventCallback(onProgress); diff --git a/src/utils/isUrl.ts b/src/utils/isUrl.ts index 5b5fa31a..ac6038d5 100644 --- a/src/utils/isUrl.ts +++ b/src/utils/isUrl.ts @@ -1,5 +1,5 @@ export function isUrl(text: string, throwOnInvalidUrl: boolean = true) { - if (text.startsWith("http://") || text.startsWith("https://")) { + if (text.toLowerCase().startsWith("http://") || text.toLowerCase().startsWith("https://")) { try { new URL(text); return true; diff --git a/src/utils/modelFileAccesTokens.ts b/src/utils/modelFileAccesTokens.ts index 3dbbfef3..86625a7a 100644 --- a/src/utils/modelFileAccesTokens.ts +++ b/src/utils/modelFileAccesTokens.ts @@ -2,6 +2,7 @@ import process from "process"; import path from "path"; import os from "os"; import fs from "fs-extra"; +import {isUrl} from "./isUrl.js"; export type ModelFileAccessTokens = { huggingFace?: string @@ -14,7 +15,7 @@ export async function resolveModelFileAccessTokensTryHeaders( ) { const res: Record[] = []; - if (tokens == null) + if (tokens == null || !isUrl(modelUrl)) return res; const parsedUrl = new URL(modelUrl); diff --git a/src/utils/parseModelUri.ts b/src/utils/parseModelUri.ts new file mode 100644 index 00000000..4e6312d3 --- /dev/null +++ b/src/utils/parseModelUri.ts @@ -0,0 +1,171 @@ +import filenamify from "filenamify"; +import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js"; +import {getFilenameForBinarySplitGgufPartUrls, resolveBinarySplitGgufPartUrls} from "../gguf/utils/resolveBinarySplitGgufPartUrls.js"; +import {getGgufSplitPartsInfo} from "../gguf/utils/resolveSplitGgufParts.js"; +import {isUrl} from "./isUrl.js"; + +const defaultHuggingFaceBranch = "main"; + +export type ParseModelUri = { + uri: string, + resolvedUrl: string, + filePrefix: string, + filename: string, + fullFilename: string +}; + +export function parseModelUri(urlOrUri: string, convertUrlToSupportedUri: boolean = false): ParseModelUri | null { + if (urlOrUri.startsWith("hf:")) + return parseHuggingFaceUriContent(urlOrUri.slice("hf:".length)); + else if (urlOrUri.startsWith("huggingface:")) + return parseHuggingFaceUriContent(urlOrUri.slice("huggingface:".length)); + + if (convertUrlToSupportedUri && isUrl(urlOrUri)) { + const parsedUrl = new URL(normalizeGgufDownloadUrl(urlOrUri)); + if (parsedUrl.hostname === "huggingface.co") { + const pathnameParts = parsedUrl.pathname.split("/"); + const [, user, model, resolve, branch, ...pathParts] = pathnameParts; + const filePath = pathParts.join("/"); + + if (user != null && model != null && resolve === "resolve" && branch != null && filePath !== "") { + return parseHuggingFaceUriContent([ + decodeURIComponent(user), + "/", decodeURIComponent(model), "/", + filePath + .split("/") + .map((part) => decodeURIComponent(part)) + .join("/"), + branch !== defaultHuggingFaceBranch + ? `#${decodeURIComponent(branch)}` + : "" + ].join("")); + } + } + } + + return null; +} + +export function isModelUri(modelUri: string) { + try { + return parseModelUri(modelUri) != null; + } catch { + return false; + } +} + +function parseHuggingFaceUriContent(uri: string): ParseModelUri { + const [user, model, ...pathParts] = uri.split("/"); + let rest = pathParts.join("/"); + const hashIndex = rest.indexOf("#"); + let branch = defaultHuggingFaceBranch; + + if (hashIndex >= 0) { + branch = rest.slice(hashIndex + "#".length); + rest = rest.slice(0, hashIndex); + + if (branch === "") + branch = defaultHuggingFaceBranch; + } + + const filePathParts = rest.split("/"); + const filePath = filePathParts + .map((part) => encodeURIComponent(part)) + .join("/"); + + if (!user || !model || filePath === "") + throw new Error(`Invalid Hugging Face URI: ${uri}`); + + const resolvedUrl = normalizeGgufDownloadUrl([ + "https://huggingface.co/", encodeURIComponent(user), + "/", encodeURIComponent(model), + "/resolve/", encodeURIComponent(branch), + "/", filePath, "?download=true" + ].join("")); + + function buildFilePrefix(user: string, model: string, branch: string, pathParts: string[], filename: string) { + const res: string[] = ["hf"]; + + res.push(filenamify(user)); + + if (!doesFilenameMatchExactModelName(filename, model) || branch !== defaultHuggingFaceBranch) + res.push(filenamify(model)); + + if (branch !== defaultHuggingFaceBranch) + res.push(filenamify(branch)); + + if (pathParts.length > 0) { + if (doesFilenameMatchExactFolderName(filename, pathParts.at(-1)!)) + pathParts = pathParts.slice(0, -1); + + if (pathParts.length > 0) + res.push(filenamify(pathParts.join("__"))); + } + + return res.join("_") + "_"; + } + + const filename = resolveModelFilenameFromUrl(resolvedUrl)!; + const filePrefix = buildFilePrefix(user, model, branch, filePathParts.slice(0, -1), filename); + return { + uri: `hf:${user}/${model}/${filePathParts.join("/")}${branch !== defaultHuggingFaceBranch ? `#${branch}` : ""}`, + resolvedUrl, + filePrefix, + filename, + fullFilename: `${filePrefix}${filename}` + }; +} + +function resolveModelFilenameFromUrl(modelUrl: string) { + const binarySplitPartUrls = resolveBinarySplitGgufPartUrls(modelUrl); + + if (binarySplitPartUrls instanceof Array) + return getFilenameForBinarySplitGgufPartUrls(binarySplitPartUrls); + + const parsedUrl = new URL(modelUrl); + const ggufIndex = parsedUrl.pathname.toLowerCase().indexOf(".gguf"); + const urlWithoutPart = parsedUrl.pathname.slice(0, ggufIndex + ".gguf".length); + + const filename = decodeURIComponent(urlWithoutPart.split("/").pop()!); + + return filenamify(filename); +} + +function doesFilenameMatchExactModelName(filename: string, modelName: string) { + if (!modelName.toLowerCase().endsWith("-gguf") || !filename.toLowerCase().endsWith(".gguf")) + return false; + + const modelNameWithoutGguf = modelName.slice(0, -"-gguf".length); + const filenameWithoutGguf = filename.slice(0, -".gguf".length); + + if (filenameWithoutGguf.toLowerCase().startsWith(modelNameWithoutGguf.toLowerCase())) + return true; + + const splitPartsInfo = getGgufSplitPartsInfo(filename); + if (splitPartsInfo == null) + return false; + + const {matchLength} = splitPartsInfo; + const filenameWithoutGgufAndWithoutSplitParts = filename.slice(0, filename.length - matchLength); + + return filenameWithoutGgufAndWithoutSplitParts.toLowerCase().startsWith(modelNameWithoutGguf.toLowerCase()); +} + +function doesFilenameMatchExactFolderName(filename: string, folderName: string) { + if (!filename.toLowerCase().endsWith(".gguf")) + return false; + + const filenameWithoutGguf = filename.slice(0, -".gguf".length); + + if (folderName.toLowerCase() === filenameWithoutGguf.toLowerCase()) + return true; + + const splitPartsInfo = getGgufSplitPartsInfo(filename); + if (splitPartsInfo == null) + return false; + + const {matchLength} = splitPartsInfo; + const filenameWithoutGgufAndWithoutSplitParts = filename.slice(0, filename.length - matchLength); + + return folderName.toLowerCase() === filenameWithoutGgufAndWithoutSplitParts.toLowerCase(); +} diff --git a/src/utils/resolveModelDestination.ts b/src/utils/resolveModelDestination.ts new file mode 100644 index 00000000..6e766541 --- /dev/null +++ b/src/utils/resolveModelDestination.ts @@ -0,0 +1,44 @@ +import path from "path"; +import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js"; +import {ParseModelUri, parseModelUri} from "./parseModelUri.js"; +import {isUrl} from "./isUrl.js"; + +export type ResolveModelDestination = { + type: "url", + url: string +} | { + type: "uri", + url: string, + uri: string, + parsedUri: ParseModelUri +} | { + type: "file", + path: string +}; + +export function resolveModelDestination(modelDestination: string, convertUrlToUri: boolean = false): ResolveModelDestination { + const parsedUri = parseModelUri(modelDestination, convertUrlToUri); + + if (parsedUri != null) { + return { + type: "uri", + url: parsedUri.resolvedUrl, + uri: parsedUri.uri, + parsedUri + }; + } else if (isUrl(modelDestination)) { + return { + type: "url", + url: normalizeGgufDownloadUrl(modelDestination) + }; + } + + try { + return { + type: "file", + path: path.resolve(process.cwd(), modelDestination) + }; + } catch (err) { + throw new Error(`Invalid path: ${modelDestination}`); + } +} diff --git a/src/utils/resolveModelFile.ts b/src/utils/resolveModelFile.ts new file mode 100644 index 00000000..79d84d10 --- /dev/null +++ b/src/utils/resolveModelFile.ts @@ -0,0 +1,262 @@ +import path from "path"; +import fs from "fs-extra"; +import chalk from "chalk"; +import {cliModelsDirectory} from "../config.js"; +import {getReadablePath} from "../cli/utils/getReadablePath.js"; +import {resolveSplitGgufParts} from "../gguf/utils/resolveSplitGgufParts.js"; +import {resolveModelDestination} from "./resolveModelDestination.js"; +import {ModelFileAccessTokens} from "./modelFileAccesTokens.js"; +import {createModelDownloader} from "./createModelDownloader.js"; + +export type ResolveModelFileOptions = { + /** + * The directory to resolve models from, and download models to. + * + * Default to `node-llama-cpp`'s default global models directory (`~/.node-llama-cpp/models`). + */ + directory?: string, + + /** + * When downloading a model file, whether to download the file if it doesn't exist. + * + * - `"auto"`: Download the file if it doesn't exist + * - `false`: Don't download the file if it doesn't exist. Implies `verify: false` even if `verify` is set to `true`. + * + * Defaults to `"auto"`. + */ + download?: "auto" | false, + + /** + * When an existing model file that corresponds to the URI is found, + * verify that it matches the expected size of the remote file. + * + * Defaults to `false`. + */ + verify?: boolean, + + /** + * The name of the file to be resolved. + * + * If provided and existing file is found with the same name, it will be returned. + * + * If provided and no existing file is found with the same name, the file will be downloaded with the provided name. + */ + fileName?: string, + + /** + * Additional headers to use when downloading a model file. + */ + headers?: Record, + + /** + * When downloading a model file, show the download progress. + * + * Defaults to `true`. + */ + cli?: boolean, + + /** + * When downloading a model file, called on download progress + */ + onProgress?: (status: {totalSize: number, downloadedSize: number}) => void, + + /** + * If true, the temporary file will be deleted if the download is canceled. + * + * Defaults to `true`. + */ + deleteTempFileOnCancel?: boolean, + + /** + * The number of parallel downloads to use when downloading split files. + * + * Defaults to `4`. + */ + parallel?: number, + + /** + * Tokens to use to access the remote model file when downloading. + */ + tokens?: ModelFileAccessTokens, + + /** + * The signal to use to cancel a download. + */ + signal?: AbortSignal +}; + +/** + * Resolves a local model file path from a URI or file path, and downloads the necessary files first if needed. + * + * If a URL or a URI is given, it'll be resolved to the corresponding file path. + * If the file path exists, it will be returned, otherwise it will be downloaded and then be returned. + * + * If a file path is given, and the path exists, it will be returned, otherwise an error will be thrown. + * + * Files are resolved from and downloaded to the `directory` option, + * which defaults to `node-llama-cpp`'s default global models directory (`~/.node-llama-cpp/models`). + * + * Set the `cli` option to `false` to hide the download progress from the console. + * @example + * ```typescript + * import {fileURLToPath} from "url"; + * import path from "path"; + * import {getLlama, resolveModelFile} from "node-llama-cpp"; + * + * const __dirname = path.dirname(fileURLToPath(import.meta.url)); + * + * // resolve a model from Hugging Face to the models directory + * const modelPath = await resolveModelFile( + * "hf:user/model/model-file.gguf", + * path.join(__dirname, "models") + * ); + * + * const llama = await getLlama(); + * const model = await llama.loadModel({modelPath}); + * ``` + * @example + * ```typescript + * import {fileURLToPath} from "url"; + * import path from "path"; + * import {getLlama, resolveModelFile} from "node-llama-cpp"; + * + * const __dirname = path.dirname(fileURLToPath(import.meta.url)); + * + * // resolve a model from a URL to the models directory + * const modelPath = await resolveModelFile( + * "https://example.com/model.gguf", + * path.join(__dirname, "models") + * ); + * + * const llama = await getLlama(); + * const model = await llama.loadModel({modelPath}); + * ``` + * @example + * ```typescript + * import {fileURLToPath} from "url"; + * import path from "path"; + * import {getLlama, resolveModelFile} from "node-llama-cpp"; + * + * const __dirname = path.dirname(fileURLToPath(import.meta.url)); + * + * // resolve a local model that is in the models directory + * const modelPath = await resolveModelFile( + * "model.gguf", + * path.join(__dirname, "models") + * ); + * + * const llama = await getLlama(); + * const model = await llama.loadModel({modelPath}); + * ``` + * @returns The resolved model file path + */ +export async function resolveModelFile( + uriOrPath: string, + optionsOrDirectory?: ResolveModelFileOptions | string +): Promise { + const { + directory, + download = "auto", + verify = false, + fileName, + headers, + cli = true, + onProgress, + deleteTempFileOnCancel = true, + parallel = 4, + tokens, + signal + } = typeof optionsOrDirectory === "string" + ? {directory: optionsOrDirectory} + : (optionsOrDirectory ?? {}); + + const resolvedDirectory = directory || cliModelsDirectory; + const resolvedCli = cli == null ? true : cli; + let resolvedVerify = verify ?? false; + + if (download === false) + resolvedVerify = false; + + const resolvedModelDestination = resolveModelDestination(uriOrPath); + + if (resolvedModelDestination.type === "file") { + const resolvedFilePath = path.resolve(resolvedDirectory, uriOrPath); + + if (await fs.pathExists(resolvedFilePath)) + return resolvedFilePath; + + throw new Error(`No model file found at "${resolvedFilePath}"`); + } + + let expectedFilePath: string | undefined = fileName != null + ? path.join(resolvedDirectory, fileName) + : undefined; + + if (expectedFilePath == null && resolvedModelDestination.type === "uri") + expectedFilePath = path.join(resolvedDirectory, resolvedModelDestination.parsedUri.fullFilename); + else if (expectedFilePath == null && resolvedModelDestination.type === "url") { + const enforcedParsedUrl = resolveModelDestination(uriOrPath, true); + if (enforcedParsedUrl != null && enforcedParsedUrl.type === "uri") + expectedFilePath = path.join(resolvedDirectory, enforcedParsedUrl.parsedUri.filename); + } + + if (expectedFilePath != null && !resolvedVerify && await fs.pathExists(expectedFilePath)) { + const allGgufParts = resolveSplitGgufParts(expectedFilePath); + if (allGgufParts.length === 1 && allGgufParts[0] === expectedFilePath) + return expectedFilePath; + + const allPartsExist = await Promise.all(allGgufParts.map((part) => fs.pathExists(part))); + if (allGgufParts.length > 0) { + if (allPartsExist.every((exists) => exists)) + return allGgufParts[0]!; + else if (download === false) + throw new Error(`Not all split parts of the model file "${allGgufParts[0]}" are present in the same directory`); + } + } + + if (download === false) { + if (expectedFilePath != null) + throw new Error(`No model file found at "${expectedFilePath}" and download is disabled`); + + throw new Error(`No model file found for "${uriOrPath}" and download is disabled`); + } + + const downloader = await createModelDownloader({ + modelUri: resolvedModelDestination.type === "uri" + ? resolvedModelDestination.uri + : resolvedModelDestination.url, + dirPath: resolvedDirectory, + headers, + showCliProgress: resolvedCli, + deleteTempFileOnCancel, + skipExisting: true, + fileName: fileName || undefined, + parallelDownloads: parallel, + onProgress, + tokens + }); + + if (expectedFilePath != null && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) { + const fileStats = await fs.stat(expectedFilePath); + + if (downloader.totalSize === fileStats.size) { + await downloader.cancel({deleteTempFile: false}); + console.log("download canceled"); + return expectedFilePath; + } + } + + if (resolvedCli) + console.info(`Downloading to ${chalk.yellow(getReadablePath(resolvedDirectory))}${ + downloader.splitBinaryParts != null + ? chalk.gray(` (combining ${downloader.splitBinaryParts} parts into a single file)`) + : "" + }`); + + await downloader.download({signal}); + + if (resolvedCli) + console.info(`Downloaded to ${chalk.yellow(getReadablePath(downloader.entrypointFilePath))}`); + + return downloader.entrypointFilePath; +} diff --git a/templates/electron-typescript-react/package.json b/templates/electron-typescript-react/package.json index 472cfed8..db3a3e91 100644 --- a/templates/electron-typescript-react/package.json +++ b/templates/electron-typescript-react/package.json @@ -11,7 +11,7 @@ }, "scripts": { "_postinstall": "npm run models:pull", - "models:pull": "node-llama-cpp pull --dir ./models \"{{modelUrl|escape|escape}}\"", + "models:pull": "node-llama-cpp pull --dir ./models \"{{modelUriOrUrl|escape|escape}}\"", "start": "vite dev", "start:build": "electron ./dist-electron", "prebuild": "rimraf ./dist ./dist-electron ./release", diff --git a/templates/node-typescript/package.json b/templates/node-typescript/package.json index d04d99a1..de3c0be9 100644 --- a/templates/node-typescript/package.json +++ b/templates/node-typescript/package.json @@ -23,7 +23,7 @@ }, "scripts": { "_postinstall": "npm run models:pull", - "models:pull": "node-llama-cpp pull --dir ./models \"{{modelUrl|escape|escape}}\"", + "models:pull": "node-llama-cpp pull --dir ./models \"{{modelUriOrUrl|escape|escape}}\"", "start": "vite-node ./src/index.ts", "start:build": "node ./dist/index.ts", "prebuild": "rimraf ./dist ./tsconfig.tsbuildinfo", diff --git a/templates/node-typescript/src/index.ts b/templates/node-typescript/src/index.ts index d0662c0d..237877f2 100644 --- a/templates/node-typescript/src/index.ts +++ b/templates/node-typescript/src/index.ts @@ -1,18 +1,22 @@ import {fileURLToPath} from "url"; import path from "path"; import chalk from "chalk"; -import {getLlama, LlamaChatSession} from "node-llama-cpp"; +import {getLlama, LlamaChatSession, resolveModelFile} from "node-llama-cpp"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const modelsFolderDirectory = path.join(__dirname, "..", "models"); +const modelsDirectory = path.join(__dirname, "..", "models"); const llama = await getLlama(); +console.log(chalk.yellow("Resolving model file...")); +const modelPath = await resolveModelFile( + "{{modelUriOrFilename|escape}}", + modelsDirectory +); + console.log(chalk.yellow("Loading model...")); -const model = await llama.loadModel({ - modelPath: path.join(modelsFolderDirectory, "{{modelFilename|escape}}") -}); +const model = await llama.loadModel({modelPath}); console.log(chalk.yellow("Creating context...")); const context = await model.createContext(); diff --git a/test/standalone/utils/createSplitPartFilename.test.ts b/test/standalone/utils/createSplitPartFilename.test.ts new file mode 100644 index 00000000..b00862be --- /dev/null +++ b/test/standalone/utils/createSplitPartFilename.test.ts @@ -0,0 +1,25 @@ +import {describe, expect, test} from "vitest"; +import {createSplitPartFilename} from "../../../src/gguf/utils/resolveSplitGgufParts.js"; + + +describe("utils", () => { + describe("createSplitPartFilename", () => { + test("simple initial part filename", async () => { + const partFilename = createSplitPartFilename("Meta-Llama-3.1-70B-Instruct-Q5_K_L.gguf", 3, 10); + + expect(partFilename).to.eql("Meta-Llama-3.1-70B-Instruct-Q5_K_L-00003-of-00010.gguf"); + }); + + test("initial part filename with existing parts information", async () => { + const partFilename = createSplitPartFilename("Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", 3, 10); + + expect(partFilename).to.eql("Meta-Llama-3.1-70B-Instruct-Q5_K_L-00003-of-00010.gguf"); + }); + + test("initial part filename with existing parts information 2", async () => { + const partFilename = createSplitPartFilename("Meta-Llama-3.1-70B-Instruct-Q5_K_L-00002-of-00002.gguf", 3, 10); + + expect(partFilename).to.eql("Meta-Llama-3.1-70B-Instruct-Q5_K_L-00003-of-00010.gguf"); + }); + }); +}); diff --git a/test/standalone/utils/parseModelUri.test.ts b/test/standalone/utils/parseModelUri.test.ts new file mode 100644 index 00000000..d7e3ec5a --- /dev/null +++ b/test/standalone/utils/parseModelUri.test.ts @@ -0,0 +1,150 @@ +import {fileURLToPath} from "url"; +import {describe, expect, test} from "vitest"; +import {parseModelUri} from "../../../src/utils/parseModelUri.js"; + +const __filename = fileURLToPath(import.meta.url); + +describe("utils", () => { + describe("parseModelUri", () => { + test("File path is not resolved", async () => { + const parsedModelUri = parseModelUri(__filename); + + expect(parsedModelUri).to.eql(null); + }); + + test("URL is not resolved by default", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf" + ); + + expect(parsedModelUri).to.eql(null); + }); + + test("Hugging Face URL is resolved", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + true + ); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_mradermacher_", + "filename": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "fullFilename": "hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "resolvedUrl": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true", + "uri": "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + } + `); + }); + + test("Hugging Face URL is resolved 2", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true", + true + ); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_mradermacher_", + "filename": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "fullFilename": "hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "resolvedUrl": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true", + "uri": "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + } + `); + }); + + test("Hugging Face URL is resolved 3", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/blob/main/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + true + ); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_bartowski_", + "filename": "Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "fullFilename": "hf_bartowski_Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "resolvedUrl": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf?download=true", + "uri": "hf:bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + } + `); + }); + + test("Hugging Face URL is resolved 4", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf?download=true", + true + ); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_bartowski_", + "filename": "Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "fullFilename": "hf_bartowski_Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "resolvedUrl": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf?download=true", + "uri": "hf:bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + } + `); + }); + + test("Hugging Face URL is resolved 5", async () => { + const parsedModelUri = parseModelUri( + "https://huggingface.co/mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/blob/main/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2", + true + ); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_mradermacher_", + "filename": "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf", + "fullFilename": "hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q8_0.gguf", + "resolvedUrl": "https://huggingface.co/mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2?download=true", + "uri": "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2", + } + `); + }); + + test("Hugging Face URI is resolved", async () => { + const parsedModelUri = parseModelUri("hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf"); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_mradermacher_", + "filename": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "fullFilename": "hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "resolvedUrl": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf?download=true", + "uri": "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + } + `); + }); + + test("Hugging Face URI is resolved 2", async () => { + const parsedModelUri = parseModelUri("hf:bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf"); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_bartowski_", + "filename": "Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "fullFilename": "hf_bartowski_Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + "resolvedUrl": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf?download=true", + "uri": "hf:bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct-Q5_K_L/Meta-Llama-3.1-70B-Instruct-Q5_K_L-00001-of-00002.gguf", + } + `); + }); + + test("Hugging Face URI is resolved 3", async () => { + const parsedModelUri = parseModelUri("hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2"); + + expect(parsedModelUri).toMatchInlineSnapshot(` + { + "filePrefix": "hf_mradermacher_", + "filename": "Meta-Llama-3.1-70B-Instruct.Q8_0.gguf", + "fullFilename": "hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q8_0.gguf", + "resolvedUrl": "https://huggingface.co/mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2?download=true", + "uri": "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2", + } + `); + }); + }); +});