Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: parallel function calling #225

Merged
merged 39 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
23c3a05
refactor: split `LlamaChat` implementation into smaller functions
giladgd May 23, 2024
86e86ac
feat: preload prompt and complete a preloaded prompt
giladgd May 24, 2024
21629bc
chore: remove redundant setting in script
giladgd May 24, 2024
c578ddb
fix: bug
giladgd May 24, 2024
2d38a7e
feat: prompt completion engine
giladgd May 25, 2024
2ea5265
feat: add prompt completion to the Electron example
giladgd May 25, 2024
33c7360
test: fix test
giladgd May 25, 2024
3dd6db1
chore: update some dev dependencies
giladgd May 25, 2024
3da99a0
feat: add action recommendations to the electron example
giladgd May 25, 2024
68395b1
feat: build the electron example and add it to the published GitHub r…
giladgd May 25, 2024
e55c858
feat: parallel function calling, chat wrapper based system message su…
giladgd Jun 3, 2024
0f9b7f3
feat: model compatibility warnings
giladgd Jun 3, 2024
ca93c0b
fix: improve CUDA detection on Windows
giladgd Jun 3, 2024
c7957d3
fix: bugs
giladgd Jun 3, 2024
d664277
feat: parallel model downloads
giladgd Jun 3, 2024
e624b5f
fix: small performance improvement
giladgd Jun 3, 2024
b79a950
test: update tests
giladgd Jun 3, 2024
859b2a2
feat: support Functionary `v2.llama3`
giladgd Jun 3, 2024
ecb25e1
fix: bugs
giladgd Jun 3, 2024
c8ba7eb
test: update models used in tests
giladgd Jun 3, 2024
92ab710
chore: adapt to `llama.cpp` breaking changes
giladgd Jun 4, 2024
0df534b
test: update tests
giladgd Jun 5, 2024
2e4fca6
feat: call function calling handle functions earlier
giladgd Jun 5, 2024
be06e0b
feat: improve function calling with plain Llama 3 Instruct
giladgd Jun 5, 2024
2edd7f6
feat: parallel function calling with plain Llama 3 Instruct
giladgd Jun 5, 2024
0bc4542
fix: bugs
giladgd Jun 5, 2024
8066f08
test: test tokenizer
giladgd Jun 5, 2024
9d2959d
refactor: make `functionCallMessageTemplate` an object
giladgd Jun 5, 2024
c9dd113
feat: improve function calling syntax for default chat wrapper
giladgd Jun 5, 2024
20f03d6
style: lint
giladgd Jun 5, 2024
ea0ba1f
fix: bugs
giladgd Jun 5, 2024
a211211
chore: update recommended models
giladgd Jun 6, 2024
f3d0475
fix: bugs
giladgd Jun 8, 2024
9eef8d4
chore: fix CUDA build
giladgd Jun 8, 2024
22ee910
test: fix tests
giladgd Jun 8, 2024
cf6242b
test: fix tests
giladgd Jun 8, 2024
57d7392
feat: `customStopTriggers` for `LlamaCompletion`
giladgd Jun 8, 2024
83e8613
fix: export all referenced types
giladgd Jun 8, 2024
5132503
fix: docs compilation
giladgd Jun 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .config/typedoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@
"enumMembersFormat": "table",
"typeDeclarationFormat": "list",
"sort": ["source-order"],
"docsRoot": "../docs"
"docsRoot": "../docs",
"intentionallyNotExported": ["MergeOptionalUnionTypes", "GbnfJsonSchemaToTSType", "_LlamaText"]
}
73 changes: 67 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
generators: "Ninja"
artifact: "linux"
- name: "macOS Clang"
os: macos-12
os: macos-13
cc: "clang"
cxx: "clang++"
generators: "Xcode"
Expand Down Expand Up @@ -110,18 +110,18 @@ jobs:

- name: Install Cuda on Windows
if: startsWith(matrix.config.os, 'windows')
uses: Jimver/[email protected].11
uses: Jimver/[email protected].15
with:
cuda: '12.2.0'
cuda: '12.4.1'
method: 'network'
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
use-local-cache: false

- name: Install Cuda on Ubuntu
if: startsWith(matrix.config.name, 'Ubuntu GCC')
uses: Jimver/[email protected].11
uses: Jimver/[email protected].15
with:
cuda: '12.2.0'
cuda: '12.4.1'
method: 'network'

- name: Install Vulkan SDK on Windows
Expand Down Expand Up @@ -198,7 +198,7 @@ jobs:
async function buildBinary(arch, flags = [], nodeTarget = nodeVersion) {
console.log(`Building ${arch} for node ${nodeTarget} with flags`, flags);

await $`node ./dist/cli/cli.js build --noUsageExample --arch ${arch} --nodeTarget ${nodeVersion} ${flags}`;
await $`node ./dist/cli/cli.js build --ciMode --noUsageExample --arch ${arch} --nodeTarget ${nodeVersion} ${flags}`;
}

// build binaries
Expand Down Expand Up @@ -343,6 +343,8 @@ jobs:
needs:
- build
- build-binaries
outputs:
package-version: ${{ steps.set-package-version.outputs.package-version }}
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -391,6 +393,12 @@ jobs:
if [ -f .semanticRelease.npmPackage.deployedVersion.txt ]; then
echo "npm-url=https://www.npmjs.com/package/node-llama-cpp/v/$(cat .semanticRelease.npmPackage.deployedVersion.txt)" >> $GITHUB_OUTPUT
fi
- name: Set package version to GITHUB_OUTPUT
id: set-package-version
run: |
if [ -f .semanticRelease.npmPackage.deployedVersion.txt ]; then
echo "package-version=$(cat .semanticRelease.npmPackage.deployedVersion.txt)" >> $GITHUB_OUTPUT
fi
- name: Prepare `create-node-llama-cpp` module
if: steps.set-npm-url.outputs.npm-url != ''
run: |
Expand Down Expand Up @@ -437,3 +445,56 @@ jobs:
uses: actions/deploy-pages@v4
with:
artifact_name: pages-docs

build-electron-example:
name: Build & release Electron app example - ${{ matrix.config.name }}
needs:
- release
if: needs.release.outputs.package-version != ''
runs-on: ${{ matrix.config.os }}
permissions:
contents: write
strategy:
fail-fast: false
matrix:
config:
- name: "Windows"
os: windows-2022
- name: "Ubuntu"
os: ubuntu-22.04
- name: "macOS"
os: macos-13

steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "20"

- name: Install modules
run: npm ci

- name: Create Electron app project
env:
DEPLOYED_PACKAGE_VERSION: ${{ needs.release.outputs.package-version }}
run: |
npx --no vite-node ./scripts/scaffoldElectronExampleForCiBuild.ts --packageVersion "$DEPLOYED_PACKAGE_VERSION" --packageFolderPath ./electron-app-example
cd electron-app-example
npm install

- name: Build electron app
id: build
shell: bash
timeout-minutes: 480
run: |
cd electron-app-example
npm run build
ls ./release

- name: Add builds to current release
uses: svenstaro/upload-release-action@v2
with:
file: "electron-app-example/release/*.{dmg,zip,exe,appx,AppImage,snap,deb,tar.gz}"
file_glob: true
tag: ${{ needs.release.outputs.package-version }}
make_latest: false
18 changes: 9 additions & 9 deletions llama/addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
return Napi::Number::From(info.Env(), -1);
}

auto tokenType = llama_token_get_type(model, token);
auto tokenAttributes = llama_token_get_attr(model, token);

if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
return Napi::Number::From(info.Env(), -1);
}

Expand All @@ -236,9 +236,9 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
return Napi::Number::From(info.Env(), -1);
}

auto tokenType = llama_token_get_type(model, token);
auto tokenAttributes = llama_token_get_attr(model, token);

if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
return Napi::Number::From(info.Env(), -1);
}

Expand Down Expand Up @@ -535,20 +535,20 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
return Napi::String::New(info.Env(), ss.str());
}

Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info) {
if (disposed) {
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
return info.Env().Undefined();
}

if (info[0].IsNumber() == false) {
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
}

int token = info[0].As<Napi::Number>().Int32Value();
auto tokenType = llama_token_get_type(model, token);
auto tokenAttributes = llama_token_get_attr(model, token);

return Napi::Number::From(info.Env(), int32_t(tokenType));
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
}
Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
if (disposed) {
Expand Down Expand Up @@ -611,7 +611,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
InstanceMethod("eotToken", &AddonModel::EotToken),
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
Expand Down
Loading
Loading