From 98a1e1ca99898467ed040ae2b2f2cb5917059271 Mon Sep 17 00:00:00 2001 From: Finn Kumkar Date: Thu, 20 Jun 2024 13:45:06 +0200 Subject: [PATCH] Use existing importance matrix files for all quant formats --- README.md | 5 ++++- imatrix/.gitignore | 3 +++ quantize_weights_for_llama.cpp.ps1 | 10 ++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 imatrix/.gitignore diff --git a/README.md b/README.md index f66ca6c..9f02e86 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ TARGET_DIRECTORY=.\gguf # physical drive to improve the quantization speed. CACHE_DIRECTORY=.\cache +# Path to the directory for importance matrix files. +IMPORTANCE_MATRIX_DIRECTORY=.\imatrix + # # Comma separated list of quantization types. # @@ -106,7 +109,7 @@ QUANTIZATION_TYPES=Q5_K_M,Q3_K_S Clone a Git repository containing an LLM into the `SOURCE_DIRECTORY` without checking out any files and downloading any large files (lfs). ```PowerShell -git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.5-0106 +git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.6-8b-20240522 ``` ### 2. Download model sources diff --git a/imatrix/.gitignore b/imatrix/.gitignore new file mode 100644 index 0000000..73943c9 --- /dev/null +++ b/imatrix/.gitignore @@ -0,0 +1,3 @@ +# Ignore everything in this directory except this file. +* +!.gitignore diff --git a/quantize_weights_for_llama.cpp.ps1 b/quantize_weights_for_llama.cpp.ps1 index 84f891b..9edc79e 100644 --- a/quantize_weights_for_llama.cpp.ps1 +++ b/quantize_weights_for_llama.cpp.ps1 @@ -14,6 +14,7 @@ Get-Content "./.env" | ForEach { $llamaCppDirectory = Resolve-Path -Path $env:LLAMA_CPP_DIRECTORY $sourceDirectory = Resolve-Path -Path $env:SOURCE_DIRECTORY $targetDirectory = Resolve-Path -Path $env:TARGET_DIRECTORY +$importanceMatrixDirectory = Resolve-Path -Path $env:IMPORTANCE_MATRIX_DIRECTORY $cacheDirectory = Resolve-Path -Path $env:CACHE_DIRECTORY $trainingDataPath = Resolve-Path -Path $env:TRAINING_DATA $cleanCache = [System.Convert]::ToBoolean($env:CLEAN_CACHE) @@ -42,7 +43,7 @@ ForEach ($repositoryName in $repositoryDirectories) { # Note that we are not removing *.importance-matrix.dat files because # they are relatively small but take a _very_ long time to compute. - $importanceMatrixPath = Join-Path -Path $targetDirectoryPath -ChildPath "${repositoryName}.importance-matrix.dat" + $importanceMatrixPath = Join-Path -Path $importanceMatrixDirectory -ChildPath "${repositoryName}.importance-matrix.dat" # If a repository already contains an unquantized GGUF file we are using it directly. $unquantizedModelPathFromSource = Join-Path -Path $sourceDirectory -ChildPath $repositoryName | Join-Path -ChildPath "${repositoryName}.gguf" @@ -64,8 +65,8 @@ ForEach ($repositoryName in $repositoryDirectories) { Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`"" } - # We need to compute an importance matrix for all i-quants and - # small k-quants to enhance the quality of the quantum models. + # We need to compute an importance matrix for all i-quants + # and small k-quants to enhance the quality of the models. # https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix $requiresImportanceMatrix = $type.Contains("IQ") -or "Q2_K Q2_K_S".Contains($type) @@ -84,7 +85,8 @@ ForEach ($repositoryName in $repositoryDirectories) { $quantizeCommand = "${llamaCppDirectory}\build\bin\Release\llama-quantize.exe" - if ($requiresImportanceMatrix) { + # If an importance matrix file is available we are using it. + if (Test-Path -Path $importanceMatrixPath) { $quantizeCommand = "${quantizeCommand} --imatrix `"${importanceMatrixPath}`"" }