ggerganov · monatis · Nov 11, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 8, 2023
diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py
@@ -16,7 +16,7 @@
 from sentencepiece import SentencePieceProcessor  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert-bloom-hf-to-gguf.py b/convert-bloom-hf-to-gguf.py
@@ -17,7 +17,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py
@@ -17,7 +17,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py
@@ -12,29 +12,9 @@
 
 import os
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
-# Note: Does not support GGML_QKK_64
-QK_K = 256
-# Items here are (block size, type size)
-GGML_QUANT_SIZES = {
-    gguf.GGMLQuantizationType.F32  : (1, 4),
-    gguf.GGMLQuantizationType.F16  : (1, 2),
-    gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16),
-    gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
-    gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
-    gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
-    gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32),
-    gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
-    gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
-    gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
-    gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
-    gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
-    gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
-    gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
-}
-
 class GGMLFormat(IntEnum):
     GGML = 0
     GGMF = 1
@@ -125,7 +105,7 @@ def load(self, data, offset):
         (n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
         assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
         assert name_len < 4096, 'Absurd tensor name length'
-        quant = GGML_QUANT_SIZES.get(dtype)
+        quant = gguf.GGML_QUANT_SIZES.get(dtype)
         assert quant is not None, 'Unknown tensor type'
         (blksize, tysize) = quant
         offset += 12

diff --git a/convert-mpt-hf-to-gguf.py b/convert-mpt-hf-to-gguf.py
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from sentencepiece import SentencePieceProcessor
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 def _flatten_dict(dct, tensors, prefix=None):

diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 

diff --git a/convert.py b/convert.py
@@ -3,11 +3,9 @@
 
 import argparse
 import concurrent.futures
-import copy
 import enum
 import faulthandler
 import functools
-import io
 import itertools
 import json
 import math
@@ -23,14 +21,14 @@
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
-from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
+from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
 
 import numpy as np
 from sentencepiece import SentencePieceProcessor  # type: ignore[import]
 
 import os
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 if TYPE_CHECKING:
@@ -851,7 +849,7 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
         elif isinstance(vocab, BpeVocab):
             self.gguf.add_tokenizer_model("gpt2")
         else:
-            raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
+            raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
         self.gguf.add_token_list(tokens)
         self.gguf.add_token_scores(scores)
         self.gguf.add_token_types(toktypes)
@@ -905,7 +903,7 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
         return dt.quantize(arr)
 
     @staticmethod
-    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
+    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
         check_vocab_size(params, vocab)
 
         of = OutputFile(fname_out, endianess=endianess)

diff --git a/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py b/examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py'))
 import gguf
 
 # gguf constants

diff --git a/gguf-py/README.md b/gguf-py/README.md
@@ -11,6 +11,14 @@ as an example for its usage.
 pip install gguf
 ```
 
+## API Examples
+
+[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
+
+[examples/dump_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/dump_gguf.py) — Dumps a GGUF file's metadata to the console.
+
+[examples/modify_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/dump_gguf.py) — Allows changing simple metadata values in a GGUF file by key.
+
 ## Development
 Maintainers who participate in development of this package are advised to install it in editable mode:
 

diff --git a/gguf-py/examples/dump_gguf.py b/gguf-py/examples/dump_gguf.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+
+# Necessary to load the local gguf package
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFReader, GGUFValueType  # noqa: E402
+
+def dump_gguf(filename: str) -> None:
+    print(f'* Loading: {filename}')
+    reader = GGUFReader(filename, 'r')
+    print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
+    for n, field in enumerate(reader.fields.values(), 1):
+        if not field.types:
+            pretty_type = 'N/A'
+        elif field.types[0] == GGUFValueType.ARRAY:
+            nest_count = len(field.types) - 1
+            pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
+        else:
+            pretty_type = str(field.types[-1].name)
+        print(f'  {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '')
+        if len(field.types) == 1:
+            curr_type = field.types[0]
+            if curr_type == GGUFValueType.STRING:
+                print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '')
+            elif field.types[0] in reader.gguf_scalar_to_np:
+                print(' = {0}'.format(field.parts[-1][0]), end = '')
+        print()
+
+    print(f'\n* Dumping {len(reader.tensors)} tensor(s)')
+    for n, tensor in enumerate(reader.tensors, 1):
+
+        prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
+        print(f'  {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}')
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print('dump_gguf: Error: Specify an input file', file = sys.stderr)
+        sys.exit(1)
+    dump_gguf(sys.argv[1])
diff --git a/gguf-py/examples/modify_gguf.py b/gguf-py/examples/modify_gguf.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+
+# Necessary to load the local gguf package
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFReader # noqa: E402
+
+def change_gguf(reader: GGUFReader, key: str, value: str) -> None:
+    field = reader.get_field(key)
+    if field is None:
+        print(f'! Field {repr(key)} not found', file = sys.stderr)
+        sys.exit(1)
+
+    handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
+    if handler is None:
+        print(f'! Field {repr(key)} has unsupported type: {field.types}')
+        sys.exit(1)
+    current_value = field.parts[field.data[0]][0]
+    new_value = handler(value)
+    print(f'* Preparing to change field {repr(key)} from {current_value} to {new_value}')
+    if current_value == new_value:
+        print(f'- Key {repr(key)} already set to requested value {current_value}')
+        sys.exit(0)
+    print('*** Warning *** Warning *** Warning **')
+    print('* Changing fields in a GGUF file can damage it. If you are positive then type YES:')
+    response = input('YES, I am sure> ')
+    if response != 'YES':
+        print("You didn't enter YES. Okay then, see ya!")
+        sys.exit(0)
+    field.parts[field.data[0]][0] = new_value
+    print('* Field changed. Successful completion.')
+
+if __name__ == '__main__':
+    if len(sys.argv) < 4:
+        print('modify_gguf: Error: Missing arguments. Syntax: modify_gguf.py <filename> <key> <value>', file = sys.stderr)
+        sys.exit(1)
+    print(f'* Loading: {sys.argv[1]}')
+    reader = GGUFReader(sys.argv[1], 'r+')
+    change_gguf(reader, sys.argv[2], sys.argv[3])
diff --git a/gguf-py/examples/writer.py b/gguf-py/examples/writer.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+
+import numpy as np
+
+# Necessary to load the local gguf package
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gguf import GGUFWriter  # noqa: E402
+
+# Example usage:
+def writer_example() -> None:
+    # Example usage with a file
+    gguf_writer = GGUFWriter("example.gguf", "llama")
+
+    gguf_writer.add_architecture()
+    gguf_writer.add_block_count(12)
+    gguf_writer.add_uint32("answer", 42)  # Write a 32-bit integer
+    gguf_writer.add_float32("answer_in_float", 42.0)  # Write a 32-bit float
+    gguf_writer.add_custom_alignment(64)
+
+    tensor1 = np.ones((32,), dtype=np.float32) * 100.0
+    tensor2 = np.ones((64,), dtype=np.float32) * 101.0
+    tensor3 = np.ones((96,), dtype=np.float32) * 102.0
+
+    gguf_writer.add_tensor("tensor1", tensor1)
+    gguf_writer.add_tensor("tensor2", tensor2)
+    gguf_writer.add_tensor("tensor3", tensor3)
+
+    gguf_writer.write_header_to_file()
+    gguf_writer.write_kv_data_to_file()
+    gguf_writer.write_tensors_to_file()
+
+    gguf_writer.close()
+
+writer_example()
diff --git a/gguf-py/gguf/__init__.py b/gguf-py/gguf/__init__.py
@@ -1 +1,5 @@
-from .gguf import *
+from .constants import *
+from .gguf_reader import *
+from .gguf_writer import *
+from .tensor_mapping import *
+from .vocab import *