Skip to content

Commit

Permalink
Merge branch 'gguf-no-tempfile'
Browse files Browse the repository at this point in the history
  • Loading branch information
cebtenzzre committed Oct 23, 2023
2 parents ab4e4ea + a535a08 commit d3b06e3
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 51 deletions.
5 changes: 1 addition & 4 deletions convert-llama-ggml-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,7 @@ def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override

def save(self):
print('* Preparing to save GGUF file')
gguf_writer = gguf.GGUFWriter(
self.cfg.output,
gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
use_temp_file = False )
gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
self.add_params(gguf_writer)
self.add_vocab(gguf_writer)
if self.special_vocab is not None:
Expand Down
2 changes: 1 addition & 1 deletion examples/finetune/convert-finetune-checkpoint-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def main():
# we should have read all available data
assert(offset == len(data))

gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
chk.save_gguf(gguf_writer)
print(" gguf: write header")
gguf_writer.write_header_to_file()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def main():
# we should have read all available data
assert(offset == len(data))

gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA])
chk.save_gguf(gguf_writer)
print(" gguf: write header")
gguf_writer.write_header_to_file()
Expand Down
97 changes: 52 additions & 45 deletions gguf-py/gguf/gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,18 +636,16 @@ def get_type(val):
sys.exit()


class WriterState:
EMPTY = auto()
HEADER = auto()
KV_DATA = auto()
TI_DATA = auto()


class GGUFWriter:
fout: BufferedWriter
arch: str
offset_tensor = 0
data_alignment = GGUF_DEFAULT_ALIGNMENT
kv_data = b""
kv_data_count = 0
ti_data = b""
ti_data_count = 0
use_temp_file: bool
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
tensors: list[tuple[np.ndarray[Any, Any], int]]
tensors: list[np.ndarray[Any, Any]]

@property
def pack_prefix(self):
Expand All @@ -656,9 +654,15 @@ def pack_prefix(self):
else:
return ">"

def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True, endianess=GGUFEndian.LITTLE):
def __init__(self, path: os.PathLike[str] | str, arch: str, endianess=GGUFEndian.LITTLE):
self.fout = open(path, "wb")
self.arch = arch
self.offset_tensor = 0
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
self.kv_data = b""
self.kv_data_count = 0
self.ti_data = b""
self.ti_data_count = 0
self.endianess = endianess
self._simple_value_packing = {
GGUFValueType.UINT8: f"{self.pack_prefix}B",
Expand All @@ -673,27 +677,41 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
GGUFValueType.BOOL: "?" ,
}
self.add_architecture()
self.use_temp_file = use_temp_file
self.tensors = []
self.state = WriterState.EMPTY

self.add_architecture()

endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
print(f"This gguf file is for {endianess_str} only")

def write_header_to_file(self):
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')

self.fout.write(struct.pack("<I", GGUF_MAGIC))
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
#print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
self.state = WriterState.HEADER

def write_kv_data_to_file(self):
if self.state is not WriterState.HEADER:
raise ValueError(f'Expected output file to contain the header, got {self.state}')

self.fout.write(self.kv_data)
self.flush()
self.state = WriterState.KV_DATA

def write_ti_data_to_file(self):
if self.state is not WriterState.KV_DATA:
raise ValueError(f'Expected output file to contain KV data, got {self.state}')

self.fout.write(self.ti_data)
self.flush()
self.state = WriterState.TI_DATA

def add_key(self, key: str):
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
Expand Down Expand Up @@ -807,33 +825,24 @@ def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype:
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
if self.endianess == GGUFEndian.BIG:
tensor.byteswap(inplace=True)
if self.use_temp_file and self.temp_file is None:
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
fp.seek(0)
self.temp_file = fp

shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)

pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes

if self.temp_file is None:
self.tensors.append((tensor, pad))
return

tensor.tofile(self.temp_file)

if pad != 0:
self.temp_file.write(bytes([0] * pad))
self.tensors.append(tensor)

def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
if pad != 0:
fp.write(bytes([0] * pad))

def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
if self.state is not WriterState.TI_DATA:
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')

if self.endianess==GGUFEndian.BIG:
tensor.byteswap(inplace=True)

self.write_padding(self.fout, self.fout.tell())
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)
Expand All @@ -843,18 +852,13 @@ def write_tensors_to_file(self):

self.write_padding(self.fout, self.fout.tell())

if self.temp_file is None:
for (currtensor, currpad) in self.tensors:
currtensor.tofile(self.fout)
if currpad != 0:
self.fout.write(bytes([0] * currpad))
return

self.temp_file.seek(0)

shutil.copyfileobj(self.temp_file, self.fout)
self.flush()
self.temp_file.close()
while True:
try:
tensor = self.tensors.pop(0)
except IndexError:
break
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)

def flush(self):
self.fout.flush()
Expand Down Expand Up @@ -983,11 +987,11 @@ def add_pad_token_id(self, id: int):


class SpecialVocab:
load_merges: bool = False
merges: list[str] = []
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
special_token_ids: dict[str, int] = {}
n_vocab: int | None = None
load_merges: bool
merges: list[str]
special_token_types: tuple[str, ...]
special_token_ids: dict[str, int]
n_vocab: int | None

def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
Expand All @@ -997,8 +1001,11 @@ def __init__(
self.special_token_ids = {}
self.n_vocab = n_vocab
self.load_merges = load_merges
self.merges = []
if special_token_types is not None:
self.special_token_types = special_token_types
else:
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
self._load(Path(path))

def _load(self, path: Path) -> None:
Expand Down

0 comments on commit d3b06e3

Please sign in to comment.