Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gguf-py: Refactor and allow reading/modifying existing GGUF files #3981

Merged
merged 33 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b8c80df
gguf-py: Refactor and add file reading support
KerfuffleV2 Nov 7, 2023
8047aa1
Replay changes from #3871
KerfuffleV2 Nov 7, 2023
d7688dc
Various type annotation fixes.
KerfuffleV2 Nov 8, 2023
a6f5742
sort imports with isort (again)
cebtenzzre Nov 8, 2023
ce865b3
Fix missing return statement in add_tensor
KerfuffleV2 Nov 8, 2023
f364636
style cleanup with flake8
cebtenzzre Nov 8, 2023
f2292fc
fix NamedTuple and Enum usage
cebtenzzre Nov 8, 2023
fffdac3
Fix an issue with state init in GGUFReader
KerfuffleV2 Nov 8, 2023
b56ed66
Damagage is not a word.
KerfuffleV2 Nov 8, 2023
4a5cd69
Clean up gguf-py/examples/modify_gguf.py whitespace
KerfuffleV2 Nov 9, 2023
2af29ff
Update gguf-py/examples/modify_gguf.py formatting
KerfuffleV2 Nov 9, 2023
855486c
Update gguf-py/gguf/gguf_reader.py type hint
KerfuffleV2 Nov 9, 2023
2360aaa
Make examples executable, formatting changes
KerfuffleV2 Nov 9, 2023
8e250fe
Add more information to GGUFReader and examples comments
KerfuffleV2 Nov 9, 2023
0d0306e
Include a gguf Python package version bump
KerfuffleV2 Nov 9, 2023
cc58ad0
Merge branch 'master' into feat-gguf-py-read-refactor
KerfuffleV2 Nov 9, 2023
bca0962
Add convert-gguf-endian.py script
KerfuffleV2 Nov 9, 2023
233cb07
cleanup
cebtenzzre Nov 9, 2023
5738b2f
gguf-py : bump minor version
cebtenzzre Nov 9, 2023
52bdc7e
Reorganize scripts
KerfuffleV2 Nov 9, 2023
a04f048
Make GGUFReader endian detection less arbitrary
KerfuffleV2 Nov 9, 2023
bd241db
Add JSON dumping support to gguf-dump.py
KerfuffleV2 Nov 9, 2023
382f975
A few for gguf-dump.py cleanups
KerfuffleV2 Nov 10, 2023
7d3580d
Murder accidental tuple in gguf-py/scripts/gguf-dump.py
KerfuffleV2 Nov 10, 2023
5608cd8
cleanup
cebtenzzre Nov 10, 2023
795dc0f
constants : remove unneeded type annotations
cebtenzzre Nov 10, 2023
a21e9e7
fix python 3.8 compat
cebtenzzre Nov 10, 2023
eff662d
Set up gguf- scripts in pyproject.toml
KerfuffleV2 Nov 10, 2023
0b0e726
And include scripts/__init__.py, derp
KerfuffleV2 Nov 10, 2023
960f912
convert.py: We can't currently support Q8_0 on big endian.
KerfuffleV2 Nov 10, 2023
9ce51b6
gguf-py: SpecialVocab: Always try available sources for special token…
KerfuffleV2 Nov 10, 2023
f22b2f2
cleanup
cebtenzzre Nov 10, 2023
4814b4b
Promote add_X_token to GGUF metadata for BOS and EOS
KerfuffleV2 Nov 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion convert-baichuan-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from sentencepiece import SentencePieceProcessor # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
2 changes: 1 addition & 1 deletion convert-bloom-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from transformers import AutoTokenizer # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
2 changes: 1 addition & 1 deletion convert-falcon-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from transformers import AutoTokenizer # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
2 changes: 1 addition & 1 deletion convert-gptneox-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoTokenizer # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
24 changes: 2 additions & 22 deletions convert-llama-ggml-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,9 @@

import os
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf

# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
GGML_QUANT_SIZES = {
gguf.GGMLQuantizationType.F32 : (1, 4),
gguf.GGMLQuantizationType.F16 : (1, 2),
gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16),
gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32),
gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
}

class GGMLFormat(IntEnum):
GGML = 0
GGMF = 1
Expand Down Expand Up @@ -125,7 +105,7 @@ def load(self, data, offset):
(n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
assert name_len < 4096, 'Absurd tensor name length'
quant = GGML_QUANT_SIZES.get(dtype)
quant = gguf.GGML_QUANT_SIZES.get(dtype)
assert quant is not None, 'Unknown tensor type'
(blksize, tysize) = quant
offset += 12
Expand Down
2 changes: 1 addition & 1 deletion convert-mpt-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoTokenizer # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
2 changes: 1 addition & 1 deletion convert-persimmon-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from sentencepiece import SentencePieceProcessor
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf

def _flatten_dict(dct, tensors, prefix=None):
Expand Down
2 changes: 1 addition & 1 deletion convert-starcoder-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers import AutoTokenizer # type: ignore[import]

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


Expand Down
10 changes: 4 additions & 6 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@

import argparse
import concurrent.futures
import copy
import enum
import faulthandler
import functools
import io
import itertools
import json
import math
Expand All @@ -23,14 +21,14 @@
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path
from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar

import numpy as np
from sentencepiece import SentencePieceProcessor # type: ignore[import]

import os
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf

if TYPE_CHECKING:
Expand Down Expand Up @@ -851,7 +849,7 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
elif isinstance(vocab, BpeVocab):
self.gguf.add_tokenizer_model("gpt2")
else:
raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
self.gguf.add_token_list(tokens)
self.gguf.add_token_scores(scores)
self.gguf.add_token_types(toktypes)
Expand Down Expand Up @@ -905,7 +903,7 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
return dt.quantize(arr)

@staticmethod
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
check_vocab_size(params, vocab)

of = OutputFile(fname_out, endianess=endianess)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pathlib import Path

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py' / 'gguf'))
sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py'))
import gguf

# gguf constants
Expand Down
8 changes: 8 additions & 0 deletions gguf-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ as an example for its usage.
pip install gguf
```

## API Examples

[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.

[examples/dump_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/dump_gguf.py) — Dumps a GGUF file's metadata to the console.

[examples/modify_gguf.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/dump_gguf.py) — Allows changing simple metadata values in a GGUF file by key.

## Development
Maintainers who participate in development of this package are advised to install it in editable mode:

Expand Down
41 changes: 41 additions & 0 deletions gguf-py/examples/dump_gguf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
import sys
from pathlib import Path

# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))

from gguf import GGUFReader, GGUFValueType # noqa: E402

def dump_gguf(filename: str) -> None:
Copy link
Collaborator

@cebtenzzre cebtenzzre Nov 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be two blank lines before and after a top-level function. Same with the other two examples.

Also, the examples should be marked executable - otherwise, the shebang lines don't do anything.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for all the suggestions (and especially the actual bugs you caught). I really appreciate the time you've spent helping improve this pull!

What are you using for formatting and would you be able to share your configuration? I'd be perfectly happy to turn on Python auto formatting if there's a standard for the Python code in this repo to follow.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The linters I'm using are:

  • mypy for type checking (there is a mypy.ini in the repo and all new python scripts should pass mypy)
  • isort for import sorting (for this repo, basically isort **/*.py -l 120 --tc -m VERTICAL_HANGING_INDENT)
  • and flake8 for PEP 8 style checking.

My flake8 configuration is messy, but I've done pip install wemake-python-styleguide and then turned off everything I don't care about. This ridiculous command should reproduce the way I'm using flake8 for llama.cpp (most of this is hidden behind a shell alias):

flake8 **/*.py --max-line-length=120 --ignore=D,DAR,I,S,A003,E121,E123,E126,E127,E201,E202,E203,E211,E221,E222,E226,E241,E251,E261,E266,E272,E306,E402,E704,E731,E741,E800,F403,F811,N400,N801,N803,N806,N812,N813,P101,P103,P205,Q000,T001,U101,W503,W504,WPS102,WPS110,WPS111,WPS113,WPS114,WPS115,WPS117,WPS120,WPS122,WPS125,WPS201,WPS202,WPS203,WPS204,WPS210,WPS211,WPS212,WPS213,WPS214,WPS218,WPS220,WPS221,WPS222,WPS223,WPS224,WPS225,WPS226,WPS229,WPS230,WPS231,WPS232,WPS234,WPS235,WPS236,WPS237,WPS238,WPS300,WPS301,WPS302,WPS304,WPS305,WPS306,WPS316,WPS317,WPS318,WPS319,WPS320,WPS322,WPS323,WPS326,WPS331,WPS332,WPS336,WPS337,WPS347,WPS348,WPS352,WPS360,WPS361,WPS362,WPS400,WPS405,WPS407,WPS412,WPS414,WPS420,WPS421,WPS422,WPS427,WPS428,WPS429,WPS430,WPS431,WPS432,WPS433,WPS434,WPS435,WPS436,WPS437,WPS440,WPS441,WPS442,WPS450,WPS457,WPS458,WPS459,WPS460,WPS463,WPS464,WPS501,WPS504,WPS508,WPS509,WPS510,WPS513,WPS518,WPS526,WPS602,WPS604,WPS605,WPS606,WPS608,WPS609,WPS611,WPS613

There is a lot of subjectivity with flake8, even that command leaves some checks enabled that don't really matter IMO. And normally I leave E251 enabled, but the style in this repo seems to use spaces around '=' in keyword arguments.

print(f'* Loading: {filename}')
reader = GGUFReader(filename, 'r')
print(f'\n* Dumping {len(reader.fields)} key/value pair(s)')
for n, field in enumerate(reader.fields.values(), 1):
if not field.types:
pretty_type = 'N/A'
elif field.types[0] == GGUFValueType.ARRAY:
nest_count = len(field.types) - 1
pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
else:
pretty_type = str(field.types[-1].name)
print(f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '')
if len(field.types) == 1:
curr_type = field.types[0]
if curr_type == GGUFValueType.STRING:
print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '')
elif field.types[0] in reader.gguf_scalar_to_np:
print(' = {0}'.format(field.parts[-1][0]), end = '')
print()

print(f'\n* Dumping {len(reader.tensors)} tensor(s)')
for n, tensor in enumerate(reader.tensors, 1):

prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}')

if __name__ == '__main__':
if len(sys.argv) < 2:
monatis marked this conversation as resolved.
Show resolved Hide resolved
print('dump_gguf: Error: Specify an input file', file = sys.stderr)
sys.exit(1)
dump_gguf(sys.argv[1])
41 changes: 41 additions & 0 deletions gguf-py/examples/modify_gguf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
import sys
from pathlib import Path

# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))

from gguf import GGUFReader # noqa: E402
KerfuffleV2 marked this conversation as resolved.
Show resolved Hide resolved

def change_gguf(reader: GGUFReader, key: str, value: str) -> None:
field = reader.get_field(key)
if field is None:
print(f'! Field {repr(key)} not found', file = sys.stderr)
sys.exit(1)

handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
if handler is None:
print(f'! Field {repr(key)} has unsupported type: {field.types}')
sys.exit(1)
current_value = field.parts[field.data[0]][0]
new_value = handler(value)
print(f'* Preparing to change field {repr(key)} from {current_value} to {new_value}')
if current_value == new_value:
print(f'- Key {repr(key)} already set to requested value {current_value}')
sys.exit(0)
print('*** Warning *** Warning *** Warning **')
print('* Changing fields in a GGUF file can damage it. If you are positive then type YES:')
monatis marked this conversation as resolved.
Show resolved Hide resolved
response = input('YES, I am sure> ')
if response != 'YES':
print("You didn't enter YES. Okay then, see ya!")
sys.exit(0)
field.parts[field.data[0]][0] = new_value
print('* Field changed. Successful completion.')

if __name__ == '__main__':
if len(sys.argv) < 4:
print('modify_gguf: Error: Missing arguments. Syntax: modify_gguf.py <filename> <key> <value>', file = sys.stderr)
KerfuffleV2 marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)
print(f'* Loading: {sys.argv[1]}')
reader = GGUFReader(sys.argv[1], 'r+')
change_gguf(reader, sys.argv[2], sys.argv[3])
37 changes: 37 additions & 0 deletions gguf-py/examples/writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import sys
from pathlib import Path

import numpy as np

# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))

from gguf import GGUFWriter # noqa: E402

# Example usage:
def writer_example() -> None:
# Example usage with a file
gguf_writer = GGUFWriter("example.gguf", "llama")

gguf_writer.add_architecture()
gguf_writer.add_block_count(12)
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
gguf_writer.add_custom_alignment(64)

tensor1 = np.ones((32,), dtype=np.float32) * 100.0
tensor2 = np.ones((64,), dtype=np.float32) * 101.0
tensor3 = np.ones((96,), dtype=np.float32) * 102.0

gguf_writer.add_tensor("tensor1", tensor1)
gguf_writer.add_tensor("tensor2", tensor2)
gguf_writer.add_tensor("tensor3", tensor3)

gguf_writer.write_header_to_file()
gguf_writer.write_kv_data_to_file()
gguf_writer.write_tensors_to_file()

gguf_writer.close()

writer_example()
KerfuffleV2 marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 5 additions & 1 deletion gguf-py/gguf/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from .gguf import *
from .constants import *
from .gguf_reader import *
from .gguf_writer import *
from .tensor_mapping import *
from .vocab import *
Loading