ggerganov · phymbert · Apr 26, 2024 · Apr 13, 2024 · Apr 13, 2024 · Apr 13, 2024
diff --git a/Makefile b/Makefile
@@ -760,7 +760,7 @@ batched-bench: examples/batched-bench/batched-bench.cpp       build-info.o ggml.
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-quantize: examples/quantize/quantize.cpp                      build-info.o ggml.o llama.o $(OBJS)
+quantize: examples/quantize/quantize.cpp                      ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 

diff --git a/common/common.cpp b/common/common.cpp
@@ -157,6 +157,48 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     return result;
 }
 
+bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
+    const char* sep = strchr(data, '=');
+    if (sep == nullptr || sep - data >= 128) {
+        fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
+        return false;
+    }
+    llama_model_kv_override kvo;
+    std::strncpy(kvo.key, data, sep - data);
+    kvo.key[sep - data] = 0;
+    sep++;
+    if (strncmp(sep, "int:", 4) == 0) {
+        sep += 4;
+        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+        kvo.int_value = std::atol(sep);
+    } else if (strncmp(sep, "float:", 6) == 0) {
+        sep += 6;
+        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
+        kvo.float_value = std::atof(sep);
+    } else if (strncmp(sep, "bool:", 5) == 0) {
+        sep += 5;
+        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
+        if (std::strcmp(sep, "true") == 0) {
+            kvo.bool_value = true;
+        } else if (std::strcmp(sep, "false") == 0) {
+            kvo.bool_value = false;
+        } else {
+            fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
+            return false;
+        }
+
+    } else if (strncmp(sep, "str:", 4) == 0) {
+        sep += 4;
+        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+        kvo.str_value = strdup(sep);
+    } else {
+        fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
+        return false;
+    }
+    overrides.emplace_back(std::move(kvo));
+    return true;
+}
+
 bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) {
     llama_sampling_params& sparams = params.sparams;
 
@@ -1153,47 +1195,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             invalid_param = true;
             return true;
         }
-        char* sep = strchr(argv[i], '=');
-        if (sep == nullptr || sep - argv[i] >= 128) {
-            fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
-            invalid_param = true;
-            return true;
-        }
-        struct llama_model_kv_override kvo;
-        std::strncpy(kvo.key, argv[i], sep - argv[i]);
-        kvo.key[sep - argv[i]] = 0;
-        sep++;
-        if (strncmp(sep, "int:", 4) == 0) {
-            sep += 4;
-            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
-            kvo.int_value = std::atol(sep);
-        }
-        else if (strncmp(sep, "float:", 6) == 0) {
-            sep += 6;
-            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
-            kvo.float_value = std::atof(sep);
-        }
-        else if (strncmp(sep, "bool:", 5) == 0) {
-            sep += 5;
-            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
-            if (std::strcmp(sep, "true") == 0) {
-                kvo.bool_value = true;
-            }
-            else if (std::strcmp(sep, "false") == 0) {
-                kvo.bool_value = false;
-            }
-            else {
-                fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
-                invalid_param = true;
-                return true;
-            }
-        }
-        else {
+        if (!parse_kv_override(argv[i], params.kv_overrides)) {
             fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
             invalid_param = true;
             return true;
         }
-        params.kv_overrides.push_back(kvo);
         return true;
     }
 #ifndef LOG_DISABLE_LOGS
@@ -1461,7 +1467,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        path to dynamic lookup cache to use for lookup decoding (updated by generation)\n");
     printf("  --override-kv KEY=TYPE:VALUE\n");
     printf("                        advanced option to override model metadata by key. may be specified multiple times.\n");
-    printf("                        types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
+    printf("                        types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
     printf("  -ptc N, --print-token-count N\n");
     printf("                        print token count every N tokens (default: %d)\n", params.n_print);
     printf("\n");

diff --git a/common/common.h b/common/common.h
@@ -169,6 +169,8 @@ struct gpt_params {
     std::string image  = ""; // path to an image file
 };
 
+bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
+
 bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params);
 
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
@@ -23,6 +23,7 @@ struct Stats {
 };
 
 struct StatParams {
+    std::string dataset;
     std::string ofile = "imatrix.dat";
     int         n_output_frequency = 10;
     int         verbosity = 1;
@@ -46,7 +47,7 @@ class IMatrixCollector {
     std::vector<float>                     m_src1_data;
     std::vector<int>                       m_ids; // the expert ids from ggml_mul_mat_id
                                                   //
-    void save_imatrix(const char * file_name) const;
+    void save_imatrix(const char * file_name, const char * dataset) const;
     void keep_imatrix(int ncall) const;
 };
 
@@ -184,32 +185,38 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
 }
 
 void IMatrixCollector::save_imatrix() const {
-    save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str());
+    save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(), m_params.dataset.c_str());
 }
 
 void IMatrixCollector::keep_imatrix(int ncall) const {
     auto file_name = m_params.ofile;
     if (file_name.empty()) file_name = "imatrix.dat";
     file_name += ".at_";
     file_name += std::to_string(ncall);
-    save_imatrix(file_name.c_str());
+    save_imatrix(file_name.c_str(), m_params.dataset.c_str());
 }
 
-void IMatrixCollector::save_imatrix(const char * fname) const {
+void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) const {
     std::ofstream out(fname, std::ios::binary);
     int n_entries = m_stats.size();
-    out.write((const char*)&n_entries, sizeof(n_entries));
-    for (auto& p : m_stats) {
+    out.write((const char *) &n_entries, sizeof(n_entries));
+    for (const auto & p : m_stats) {
         int len = p.first.size();
-        out.write((const char*)&len, sizeof(len));
+        out.write((const char *) &len, sizeof(len));
         out.write(p.first.c_str(), len);
-        out.write((const char*)&p.second.ncall, sizeof(p.second.ncall));
+        out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
         int nval = p.second.values.size();
-        out.write((const char*)&nval, sizeof(nval));
-        if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float));
+        out.write((const char *) &nval, sizeof(nval));
+        if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
     }
+
+    // Write the dataset name at the end of the file to later on specify it in quantize
+    int n_dataset = strlen(dataset);
+    out.write((const char *) &n_dataset, sizeof(n_dataset));
+    out.write(dataset, n_dataset);
+
     if (m_params.verbosity > 0) {
-        fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n",__func__,m_last_call,fname);
+        fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname);
     }
 }
 
@@ -532,6 +539,29 @@ int main(int argc, char ** argv) {
         }
     }
 
+    gpt_params params;
+    params.n_batch = 512;
+    if (!gpt_params_parse(args.size(), args.data(), params)) {
+        return 1;
+    }
+
+    params.logits_all = true;
+    params.n_batch = std::min(params.n_batch, params.n_ctx);
+
+    print_build_info();
+
+    if (params.seed == LLAMA_DEFAULT_SEED) {
+        params.seed = time(NULL);
+    }
+
+    fprintf(stderr, "%s: seed  = %u\n", __func__, params.seed);
+
+    std::mt19937 rng(params.seed);
+    if (params.random_prompt) {
+        params.prompt = gpt_random_prompt(rng);
+    }
+
+    sparams.dataset = params.prompt_file;
     g_collector.set_parameters(std::move(sparams));
 
     if (!combine_files.empty()) {
@@ -570,28 +600,6 @@ int main(int argc, char ** argv) {
         }
     }
 
-    gpt_params params;
-    params.n_batch = 512;
-    if (!gpt_params_parse(args.size(), args.data(), params)) {
-        return 1;
-    }
-
-    params.logits_all = true;
-    params.n_batch = std::min(params.n_batch, params.n_ctx);
-
-    print_build_info();
-
-    if (params.seed == LLAMA_DEFAULT_SEED) {
-        params.seed = time(NULL);
-    }
-
-    fprintf(stderr, "%s: seed  = %u\n", __func__, params.seed);
-
-    std::mt19937 rng(params.seed);
-    if (params.random_prompt) {
-        params.prompt = gpt_random_prompt(rng);
-    }
-
     llama_backend_init();
     llama_numa_init(params.numa);
 

diff --git a/examples/quantize/CMakeLists.txt b/examples/quantize/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(TARGET quantize)
 add_executable(${TARGET} quantize.cpp)
 install(TARGETS ${TARGET} RUNTIME)
-target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
+target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
 target_include_directories(${TARGET} PRIVATE ../../common)
 target_compile_features(${TARGET} PRIVATE cxx_std_11)
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -8,7 +8,6 @@
 #include <unordered_map>
 #include <fstream>
 #include <cmath>
-#include <algorithm>
 
 struct quant_option {
     std::string name;
@@ -53,6 +52,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "COPY",   LLAMA_FTYPE_ALL_F32,       "only copy tensors, no quantizing", },
 };
 
+static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET    = "quantize.imatrix.dataset";
+static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES  = "quantize.imatrix.n_entries";
 
 static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
     std::string ftype_str;
@@ -112,7 +113,7 @@ static void usage(const char * executable) {
     exit(1);
 }
 
-static void load_imatrix(const std::string & imatrix_file, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
+static void load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
     std::ifstream in(imatrix_file.c_str(), std::ios::binary);
     if (!in) {
         printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
@@ -159,15 +160,27 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
             printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), ncall, name.c_str());
         }
     }
+
+    // latest imatrix version contains the dataset filename at the end of the file
+    if (in.peek() != EOF) {
+        int dataset_len;
+        in.read((char *)&dataset_len, sizeof(dataset_len));
+        std::vector<char> dataset_as_vec(dataset_len+1);
+        in.read((char *)dataset_as_vec.data(), dataset_len);
+        dataset_as_vec[dataset_len] = 0;
+        imatrix_dataset = std::string{dataset_as_vec.data()};
+        printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
+    }
     printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
 }
 
 static void prepare_imatrix(const std::string & imatrix_file,
+        std::string & imatrix_dataset,
         const std::vector<std::string> & included_weights,
         const std::vector<std::string> & excluded_weights,
         std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
     if (!imatrix_file.empty()) {
-        load_imatrix(imatrix_file, imatrix_data);
+        load_imatrix(imatrix_file, imatrix_dataset, imatrix_data);
     }
     if (imatrix_data.empty()) {
         return;
@@ -210,43 +223,6 @@ static ggml_type parse_ggml_type(const char * arg) {
     return result;
 }
 
-static bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
-    const char* sep = strchr(data, '=');
-    if (sep == nullptr || sep - data >= 128) {
-        fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
-        return false;
-    }
-    llama_model_kv_override kvo;
-    std::strncpy(kvo.key, data, sep - data);
-    kvo.key[sep - data] = 0;
-    sep++;
-    if (strncmp(sep, "int:", 4) == 0) {
-        sep += 4;
-        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
-        kvo.int_value = std::atol(sep);
-    } else if (strncmp(sep, "float:", 6) == 0) {
-        sep += 6;
-        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
-        kvo.float_value = std::atof(sep);
-    } else if (strncmp(sep, "bool:", 5) == 0) {
-        sep += 5;
-        kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
-        if (std::strcmp(sep, "true") == 0) {
-            kvo.bool_value = true;
-        } else if (std::strcmp(sep, "false") == 0) {
-            kvo.bool_value = false;
-        } else {
-            fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
-            return false;
-        }
-    } else {
-        fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
-        return false;
-    }
-    overrides.emplace_back(std::move(kvo));
-    return true;
-}
-
 int main(int argc, char ** argv) {
     if (argc < 3) {
         usage(argv[0]);
@@ -313,10 +289,26 @@ int main(int argc, char ** argv) {
         usage(argv[0]);
     }
 
+    std::string imatrix_dataset;
     std::unordered_map<std::string, std::vector<float>> imatrix_data;
-    prepare_imatrix(imatrix_file, included_weights, excluded_weights, imatrix_data);
+    prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data);
     if (!imatrix_data.empty()) {
         params.imatrix = &imatrix_data;
+        if (!imatrix_dataset.empty()) {
+            llama_model_kv_override kvo;
+            std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET);
+            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+            kvo.str_value = strdup(imatrix_dataset.c_str());
+            kv_overrides.emplace_back(std::move(kvo));
+        }
+
+        {
+            llama_model_kv_override kvo;
+            std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES);
+            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+            kvo.int_value = imatrix_data.size();
+            kv_overrides.emplace_back(std::move(kvo));
+        }
     }
     if (!kv_overrides.empty()) {
         kv_overrides.emplace_back();