Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manually fix existing lint errors #226

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions build/gguf_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
# LICENSE file in the root directory of this source tree.


import argparse

import copy
import logging
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Mapping
from typing import Any, Dict

import gguf

Expand All @@ -31,8 +29,6 @@
wd = Path(__file__).parent.resolve()
sys.path.append(str(wd))

from typing import Set

from model import ModelArgs, Transformer

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion build/gguf_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_by_to_float(source_file: str, target_file: str) -> None:
)
print("First 5 elements of converted source: ", source.reshape(-1)[0:5])
print("First 5 elements of target: ", target.reshape(-1)[0:5])
assert False, "found mismatch"
raise AssertionError("found mismatch")

print("All tensors match.")

Expand Down
98 changes: 54 additions & 44 deletions build/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,48 +95,58 @@ def from_name(cls, name: str):


transformer_configs = {
"CodeLlama-7b-Python-hf": dict(
block_size=16384, vocab_size=32000, n_layers=32, dim=4096, rope_base=1000000
),
"7B": dict(n_layers=32, n_heads=32, dim=4096),
"13B": dict(n_layers=40, n_heads=40, dim=5120),
"30B": dict(n_layers=60, n_heads=52, dim=6656),
"34B": dict(
n_layers=48,
n_heads=64,
dim=8192,
vocab_size=32000,
n_local_heads=8,
hidden_dim=22016,
rope_base=1000000,
), # CodeLlama-34B-Python-hf
"70B": dict(n_layers=80, n_heads=64, dim=8192, n_local_heads=8, hidden_dim=28672),
"Mistral-7B": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"Mistral-7B-Instruct-v0.1": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"Mistral-7B-Instruct-v0.2": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"stories15M": dict(n_layers=6, n_heads=6, dim=288),
"stories110M": dict(n_layers=12, n_heads=12, dim=768),
"CodeLlama-7b-Python-hf": {
"block_size": 16384,
"vocab_size": 32000,
"n_layers": 32,
"dim": 4096,
"rope_base": 1000000,
},
"7B": {"n_layers": 32, "n_heads": 32, "dim": 4096},
"13B": {"n_layers": 40, "n_heads": 40, "dim": 5120},
"30B": {"n_layers": 60, "n_heads": 52, "dim": 6656},
"34B": {
"n_layers": 48,
"n_heads": 64,
"dim": 8192,
"vocab_size": 32000,
"n_local_heads": 8,
"hidden_dim": 22016,
"rope_base": 1000000,
}, # CodeLlama-34B-Python-hf
"70B": {
"n_layers": 80,
"n_heads": 64,
"dim": 8192,
"n_local_heads": 8,
"hidden_dim": 28672,
},
"Mistral-7B": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"Mistral-7B-Instruct-v0.1": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"Mistral-7B-Instruct-v0.2": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"stories15M": {"n_layers": 6, "n_heads": 6, "dim": 288},
"stories110M": {"n_layers": 12, "n_heads": 12, "dim": 768},
}


Expand Down Expand Up @@ -216,7 +226,7 @@ def forward(self, idx: Tensor, input_pos: Optional[Tensor] = None) -> Tensor:
freqs_cis = self.freqs_cis[input_pos]
x = self.tok_embeddings(idx)

for i, layer in enumerate(self.layers):
for _, layer in enumerate(self.layers):
x = layer(x, input_pos, freqs_cis, mask)
x = self.norm(x)
logits = self.output(x)
Expand Down Expand Up @@ -344,7 +354,7 @@ def forward(
q = apply_rotary_emb(q, freqs_cis)
k = apply_rotary_emb(k, freqs_cis)

q, k, v = map(lambda x: x.transpose(1, 2), (q, k, v))
q, k, v = (x.transpose(1, 2) for x in (q, k, v))

if self.kv_cache is not None:
k, v = self.kv_cache.update(input_pos, k, v)
Expand Down
6 changes: 0 additions & 6 deletions build/model_aoti.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
from ctypes import c_void_p

import torch
import torch.nn as nn
from torch import empty
from torch._dynamo.testing import rand_strided
from torch._inductor.codecache import AsyncCompile
from torch._inductor.utils import print_performance
from torch._inductor.wrapper_benchmark import compiled_module_main

# with open("./dso_model.h", "rb") as f:
# dso_src = f.read().decode("utf-8")
Expand Down
3 changes: 0 additions & 3 deletions build/model_et.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from ctypes import c_void_p

import torch
import torch.nn as nn
from executorch.extension.pybindings import portable_lib as exec_lib
from torch import empty


class PTEModel(nn.Module):
Expand Down
7 changes: 5 additions & 2 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _model_generate(self, context, max_length, eos_token_id):
def eval(
model: Transformer,
tokenizer,
tasks: list = ["hellaswag"],
tasks: Optional[list] = None,
limit: Optional[int] = None,
max_seq_length: Optional[int] = None,
) -> dict:
Expand All @@ -182,6 +182,9 @@ def eval(
Returns:
eval_results (dict): A dictionary of evaluation results for the specified task(s).
"""
if tasks is None:
tasks = ["hellaswag"]

model_eval_wrapper = GPTFastEvalWrapper(
model,
tokenizer,
Expand All @@ -195,7 +198,7 @@ def eval(

if "hendrycks_test" in tasks:
tasks.remove("hendrycks_test")
tasks += [x for x in lm_eval.tasks.hendrycks_test.create_all_tasks().keys()]
tasks += list(lm_eval.tasks.hendrycks_test.create_all_tasks().keys())
task_dict = get_task_dict(tasks)

eval_results = evaluate(
Expand Down
11 changes: 1 addition & 10 deletions export_aoti.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,11 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import itertools
import sys
import time
from pathlib import Path
from typing import Optional, Tuple

import torch
import torch.nn as nn

from build.model import Transformer

from generate import decode_one_token
from quantize import quantize_model
from torch.export import Dim, export
from torch.export import Dim

default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'

Expand Down
7 changes: 1 addition & 6 deletions export_et.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import time
from pathlib import Path

import torch
import torch.nn as nn
from build.model import Transformer

from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
Expand All @@ -28,10 +25,8 @@
# )
from executorch_portable_utils import export_to_edge

from generate import decode_one_token
from quantize import get_precision, name_to_dtype, quantize_model, set_precision
from quantize import get_precision
from torch._export import capture_pre_autograd_graph
from torch.export import Dim, export


default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'
Expand Down
6 changes: 3 additions & 3 deletions scripts/convert_hf_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
@torch.inference_mode()
def convert_hf_checkpoint(
*,
checkpoint_dir: Path = Path(
"checkpoints/meta-Transformer/Transformer-2-7b-chat-hf"
),
checkpoint_dir: Optional[Path] = None,
model_name: Optional[str] = None,
) -> None:
if checkpoint_dir is None:
checkpoint_dir = Path("checkpoints/meta-Transformer/Transformer-2-7b-chat-hf")
if model_name is None:
model_name = checkpoint_dir.name

Expand Down
Loading