From 98a1e1ca99898467ed040ae2b2f2cb5917059271 Mon Sep 17 00:00:00 2001
From: Finn Kumkar <kumkar@stadtwerk.org>
Date: Thu, 20 Jun 2024 13:45:06 +0200
Subject: [PATCH] Use existing importance matrix files for all quant formats

---
 README.md                          |  5 ++++-
 imatrix/.gitignore                 |  3 +++
 quantize_weights_for_llama.cpp.ps1 | 10 ++++++----
 3 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100644 imatrix/.gitignore

diff --git a/README.md b/README.md
index f66ca6c..9f02e86 100644
--- a/README.md
+++ b/README.md
@@ -53,6 +53,9 @@ TARGET_DIRECTORY=.\gguf
 # physical drive to improve the quantization speed.
 CACHE_DIRECTORY=.\cache
 
+# Path to the directory for importance matrix files.
+IMPORTANCE_MATRIX_DIRECTORY=.\imatrix
+
 #
 # Comma separated list of quantization types.
 #
@@ -106,7 +109,7 @@ QUANTIZATION_TYPES=Q5_K_M,Q3_K_S
 Clone a Git repository containing an LLM into the `SOURCE_DIRECTORY` without checking out any files and downloading any large files (lfs).
 
 ```PowerShell
-git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.5-0106
+git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.6-8b-20240522
 ```
 
 ### 2. Download model sources
diff --git a/imatrix/.gitignore b/imatrix/.gitignore
new file mode 100644
index 0000000..73943c9
--- /dev/null
+++ b/imatrix/.gitignore
@@ -0,0 +1,3 @@
+# Ignore everything in this directory except this file.
+*
+!.gitignore
diff --git a/quantize_weights_for_llama.cpp.ps1 b/quantize_weights_for_llama.cpp.ps1
index 84f891b..9edc79e 100644
--- a/quantize_weights_for_llama.cpp.ps1
+++ b/quantize_weights_for_llama.cpp.ps1
@@ -14,6 +14,7 @@ Get-Content "./.env" | ForEach {
 $llamaCppDirectory = Resolve-Path -Path $env:LLAMA_CPP_DIRECTORY
 $sourceDirectory = Resolve-Path -Path $env:SOURCE_DIRECTORY
 $targetDirectory = Resolve-Path -Path $env:TARGET_DIRECTORY
+$importanceMatrixDirectory = Resolve-Path -Path $env:IMPORTANCE_MATRIX_DIRECTORY
 $cacheDirectory = Resolve-Path -Path $env:CACHE_DIRECTORY
 $trainingDataPath = Resolve-Path -Path $env:TRAINING_DATA
 $cleanCache = [System.Convert]::ToBoolean($env:CLEAN_CACHE)
@@ -42,7 +43,7 @@ ForEach ($repositoryName in $repositoryDirectories) {
 
     # Note that we are not removing *.importance-matrix.dat files because
     # they are relatively small but take a _very_ long time to compute.
-    $importanceMatrixPath = Join-Path -Path $targetDirectoryPath -ChildPath "${repositoryName}.importance-matrix.dat"
+    $importanceMatrixPath = Join-Path -Path $importanceMatrixDirectory -ChildPath "${repositoryName}.importance-matrix.dat"
 
     # If a repository already contains an unquantized GGUF file we are using it directly.
     $unquantizedModelPathFromSource = Join-Path -Path $sourceDirectory -ChildPath $repositoryName | Join-Path -ChildPath "${repositoryName}.gguf"
@@ -64,8 +65,8 @@ ForEach ($repositoryName in $repositoryDirectories) {
             Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`""
         }
 
-        # We need to compute an importance matrix for all i-quants and
-        # small k-quants to enhance the quality of the quantum models.
+        # We need to compute an importance matrix for all i-quants
+        # and small k-quants to enhance the quality of the models.
         # https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix
         $requiresImportanceMatrix = $type.Contains("IQ") -or "Q2_K Q2_K_S".Contains($type)
 
@@ -84,7 +85,8 @@ ForEach ($repositoryName in $repositoryDirectories) {
 
             $quantizeCommand = "${llamaCppDirectory}\build\bin\Release\llama-quantize.exe"
 
-            if ($requiresImportanceMatrix) {
+            # If an importance matrix file is available we are using it.
+            if (Test-Path -Path $importanceMatrixPath) {
                 $quantizeCommand = "${quantizeCommand} --imatrix `"${importanceMatrixPath}`""
             }