diff --git a/build/gguf_loader.py b/build/gguf_loader.py index 464f4536a..033cec212 100644 --- a/build/gguf_loader.py +++ b/build/gguf_loader.py @@ -5,14 +5,12 @@ # LICENSE file in the root directory of this source tree. -import argparse - import copy import logging import sys from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, Mapping +from typing import Any, Dict import gguf @@ -31,8 +29,6 @@ wd = Path(__file__).parent.resolve() sys.path.append(str(wd)) -from typing import Set - from model import ModelArgs, Transformer logger: logging.Logger = logging.getLogger(__name__) diff --git a/build/gguf_util.py b/build/gguf_util.py index 9f8a07661..fe631781e 100644 --- a/build/gguf_util.py +++ b/build/gguf_util.py @@ -63,7 +63,7 @@ def test_by_to_float(source_file: str, target_file: str) -> None: ) print("First 5 elements of converted source: ", source.reshape(-1)[0:5]) print("First 5 elements of target: ", target.reshape(-1)[0:5]) - assert False, "found mismatch" + raise AssertionError("found mismatch") print("All tensors match.") diff --git a/build/model.py b/build/model.py index 655405d9f..4786434f8 100644 --- a/build/model.py +++ b/build/model.py @@ -95,48 +95,58 @@ def from_name(cls, name: str): transformer_configs = { - "CodeLlama-7b-Python-hf": dict( - block_size=16384, vocab_size=32000, n_layers=32, dim=4096, rope_base=1000000 - ), - "7B": dict(n_layers=32, n_heads=32, dim=4096), - "13B": dict(n_layers=40, n_heads=40, dim=5120), - "30B": dict(n_layers=60, n_heads=52, dim=6656), - "34B": dict( - n_layers=48, - n_heads=64, - dim=8192, - vocab_size=32000, - n_local_heads=8, - hidden_dim=22016, - rope_base=1000000, - ), # CodeLlama-34B-Python-hf - "70B": dict(n_layers=80, n_heads=64, dim=8192, n_local_heads=8, hidden_dim=28672), - "Mistral-7B": dict( - n_layers=32, - n_heads=32, - n_local_heads=8, - dim=4096, - hidden_dim=14336, - vocab_size=32000, - ), - "Mistral-7B-Instruct-v0.1": dict( - n_layers=32, - n_heads=32, - n_local_heads=8, - dim=4096, - hidden_dim=14336, - vocab_size=32000, - ), - "Mistral-7B-Instruct-v0.2": dict( - n_layers=32, - n_heads=32, - n_local_heads=8, - dim=4096, - hidden_dim=14336, - vocab_size=32000, - ), - "stories15M": dict(n_layers=6, n_heads=6, dim=288), - "stories110M": dict(n_layers=12, n_heads=12, dim=768), + "CodeLlama-7b-Python-hf": { + "block_size": 16384, + "vocab_size": 32000, + "n_layers": 32, + "dim": 4096, + "rope_base": 1000000, + }, + "7B": {"n_layers": 32, "n_heads": 32, "dim": 4096}, + "13B": {"n_layers": 40, "n_heads": 40, "dim": 5120}, + "30B": {"n_layers": 60, "n_heads": 52, "dim": 6656}, + "34B": { + "n_layers": 48, + "n_heads": 64, + "dim": 8192, + "vocab_size": 32000, + "n_local_heads": 8, + "hidden_dim": 22016, + "rope_base": 1000000, + }, # CodeLlama-34B-Python-hf + "70B": { + "n_layers": 80, + "n_heads": 64, + "dim": 8192, + "n_local_heads": 8, + "hidden_dim": 28672, + }, + "Mistral-7B": { + "n_layers": 32, + "n_heads": 32, + "n_local_heads": 8, + "dim": 4096, + "hidden_dim": 14336, + "vocab_size": 32000, + }, + "Mistral-7B-Instruct-v0.1": { + "n_layers": 32, + "n_heads": 32, + "n_local_heads": 8, + "dim": 4096, + "hidden_dim": 14336, + "vocab_size": 32000, + }, + "Mistral-7B-Instruct-v0.2": { + "n_layers": 32, + "n_heads": 32, + "n_local_heads": 8, + "dim": 4096, + "hidden_dim": 14336, + "vocab_size": 32000, + }, + "stories15M": {"n_layers": 6, "n_heads": 6, "dim": 288}, + "stories110M": {"n_layers": 12, "n_heads": 12, "dim": 768}, } @@ -216,7 +226,7 @@ def forward(self, idx: Tensor, input_pos: Optional[Tensor] = None) -> Tensor: freqs_cis = self.freqs_cis[input_pos] x = self.tok_embeddings(idx) - for i, layer in enumerate(self.layers): + for _, layer in enumerate(self.layers): x = layer(x, input_pos, freqs_cis, mask) x = self.norm(x) logits = self.output(x) @@ -344,7 +354,7 @@ def forward( q = apply_rotary_emb(q, freqs_cis) k = apply_rotary_emb(k, freqs_cis) - q, k, v = map(lambda x: x.transpose(1, 2), (q, k, v)) + q, k, v = (x.transpose(1, 2) for x in (q, k, v)) if self.kv_cache is not None: k, v = self.kv_cache.update(input_pos, k, v) diff --git a/build/model_aoti.py b/build/model_aoti.py index 50fa2b939..b7c1bda56 100644 --- a/build/model_aoti.py +++ b/build/model_aoti.py @@ -1,12 +1,6 @@ -from ctypes import c_void_p - import torch import torch.nn as nn -from torch import empty -from torch._dynamo.testing import rand_strided from torch._inductor.codecache import AsyncCompile -from torch._inductor.utils import print_performance -from torch._inductor.wrapper_benchmark import compiled_module_main # with open("./dso_model.h", "rb") as f: # dso_src = f.read().decode("utf-8") diff --git a/build/model_et.py b/build/model_et.py index f7bd02194..747a7ca31 100644 --- a/build/model_et.py +++ b/build/model_et.py @@ -1,9 +1,6 @@ -from ctypes import c_void_p - import torch import torch.nn as nn from executorch.extension.pybindings import portable_lib as exec_lib -from torch import empty class PTEModel(nn.Module): diff --git a/eval.py b/eval.py index 6d719d460..f681d8418 100644 --- a/eval.py +++ b/eval.py @@ -165,7 +165,7 @@ def _model_generate(self, context, max_length, eos_token_id): def eval( model: Transformer, tokenizer, - tasks: list = ["hellaswag"], + tasks: Optional[list] = None, limit: Optional[int] = None, max_seq_length: Optional[int] = None, ) -> dict: @@ -182,6 +182,9 @@ def eval( Returns: eval_results (dict): A dictionary of evaluation results for the specified task(s). """ + if tasks is None: + tasks = ["hellaswag"] + model_eval_wrapper = GPTFastEvalWrapper( model, tokenizer, @@ -195,7 +198,7 @@ def eval( if "hendrycks_test" in tasks: tasks.remove("hendrycks_test") - tasks += [x for x in lm_eval.tasks.hendrycks_test.create_all_tasks().keys()] + tasks += list(lm_eval.tasks.hendrycks_test.create_all_tasks().keys()) task_dict = get_task_dict(tasks) eval_results = evaluate( diff --git a/export_aoti.py b/export_aoti.py index b9a59c3bb..2fa58369d 100644 --- a/export_aoti.py +++ b/export_aoti.py @@ -4,20 +4,11 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -import itertools -import sys -import time -from pathlib import Path -from typing import Optional, Tuple import torch import torch.nn as nn -from build.model import Transformer - -from generate import decode_one_token -from quantize import quantize_model -from torch.export import Dim, export +from torch.export import Dim default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' diff --git a/export_et.py b/export_et.py index eecba7ec7..16b3ad617 100644 --- a/export_et.py +++ b/export_et.py @@ -4,11 +4,8 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -import time -from pathlib import Path import torch -import torch.nn as nn from build.model import Transformer from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( @@ -28,10 +25,8 @@ # ) from executorch_portable_utils import export_to_edge -from generate import decode_one_token -from quantize import get_precision, name_to_dtype, quantize_model, set_precision +from quantize import get_precision from torch._export import capture_pre_autograd_graph -from torch.export import Dim, export default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' diff --git a/scripts/convert_hf_checkpoint.py b/scripts/convert_hf_checkpoint.py index 1d4300f6e..b5d6d7ba2 100644 --- a/scripts/convert_hf_checkpoint.py +++ b/scripts/convert_hf_checkpoint.py @@ -22,11 +22,11 @@ @torch.inference_mode() def convert_hf_checkpoint( *, - checkpoint_dir: Path = Path( - "checkpoints/meta-Transformer/Transformer-2-7b-chat-hf" - ), + checkpoint_dir: Optional[Path] = None, model_name: Optional[str] = None, ) -> None: + if checkpoint_dir is None: + checkpoint_dir = Path("checkpoints/meta-Transformer/Transformer-2-7b-chat-hf") if model_name is None: model_name = checkpoint_dir.name