Skip to content

Commit

Permalink
Manually fix existing lint errors (#226)
Browse files Browse the repository at this point in the history
Summary:

There are things that can't be autopatched via the lint. Fixing them manually.

Test Plan:

- lintrunner -a --all-files
- CI
  • Loading branch information
mergennachin authored and malfet committed Jul 17, 2024
1 parent a52d841 commit d817fd6
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 80 deletions.
6 changes: 1 addition & 5 deletions build/gguf_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
# LICENSE file in the root directory of this source tree.


import argparse

import copy
import logging
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Mapping
from typing import Any, Dict

import gguf

Expand All @@ -31,8 +29,6 @@
wd = Path(__file__).parent.resolve()
sys.path.append(str(wd))

from typing import Set

from model import ModelArgs, Transformer

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion build/gguf_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_by_to_float(source_file: str, target_file: str) -> None:
)
print("First 5 elements of converted source: ", source.reshape(-1)[0:5])
print("First 5 elements of target: ", target.reshape(-1)[0:5])
assert False, "found mismatch"
raise AssertionError("found mismatch")

print("All tensors match.")

Expand Down
98 changes: 54 additions & 44 deletions build/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,48 +95,58 @@ def from_name(cls, name: str):


transformer_configs = {
"CodeLlama-7b-Python-hf": dict(
block_size=16384, vocab_size=32000, n_layers=32, dim=4096, rope_base=1000000
),
"7B": dict(n_layers=32, n_heads=32, dim=4096),
"13B": dict(n_layers=40, n_heads=40, dim=5120),
"30B": dict(n_layers=60, n_heads=52, dim=6656),
"34B": dict(
n_layers=48,
n_heads=64,
dim=8192,
vocab_size=32000,
n_local_heads=8,
hidden_dim=22016,
rope_base=1000000,
), # CodeLlama-34B-Python-hf
"70B": dict(n_layers=80, n_heads=64, dim=8192, n_local_heads=8, hidden_dim=28672),
"Mistral-7B": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"Mistral-7B-Instruct-v0.1": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"Mistral-7B-Instruct-v0.2": dict(
n_layers=32,
n_heads=32,
n_local_heads=8,
dim=4096,
hidden_dim=14336,
vocab_size=32000,
),
"stories15M": dict(n_layers=6, n_heads=6, dim=288),
"stories110M": dict(n_layers=12, n_heads=12, dim=768),
"CodeLlama-7b-Python-hf": {
"block_size": 16384,
"vocab_size": 32000,
"n_layers": 32,
"dim": 4096,
"rope_base": 1000000,
},
"7B": {"n_layers": 32, "n_heads": 32, "dim": 4096},
"13B": {"n_layers": 40, "n_heads": 40, "dim": 5120},
"30B": {"n_layers": 60, "n_heads": 52, "dim": 6656},
"34B": {
"n_layers": 48,
"n_heads": 64,
"dim": 8192,
"vocab_size": 32000,
"n_local_heads": 8,
"hidden_dim": 22016,
"rope_base": 1000000,
}, # CodeLlama-34B-Python-hf
"70B": {
"n_layers": 80,
"n_heads": 64,
"dim": 8192,
"n_local_heads": 8,
"hidden_dim": 28672,
},
"Mistral-7B": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"Mistral-7B-Instruct-v0.1": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"Mistral-7B-Instruct-v0.2": {
"n_layers": 32,
"n_heads": 32,
"n_local_heads": 8,
"dim": 4096,
"hidden_dim": 14336,
"vocab_size": 32000,
},
"stories15M": {"n_layers": 6, "n_heads": 6, "dim": 288},
"stories110M": {"n_layers": 12, "n_heads": 12, "dim": 768},
}


Expand Down Expand Up @@ -216,7 +226,7 @@ def forward(self, idx: Tensor, input_pos: Optional[Tensor] = None) -> Tensor:
freqs_cis = self.freqs_cis[input_pos]
x = self.tok_embeddings(idx)

for i, layer in enumerate(self.layers):
for _, layer in enumerate(self.layers):
x = layer(x, input_pos, freqs_cis, mask)
x = self.norm(x)
logits = self.output(x)
Expand Down Expand Up @@ -344,7 +354,7 @@ def forward(
q = apply_rotary_emb(q, freqs_cis)
k = apply_rotary_emb(k, freqs_cis)

q, k, v = map(lambda x: x.transpose(1, 2), (q, k, v))
q, k, v = (x.transpose(1, 2) for x in (q, k, v))

if self.kv_cache is not None:
k, v = self.kv_cache.update(input_pos, k, v)
Expand Down
6 changes: 0 additions & 6 deletions build/model_aoti.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
from ctypes import c_void_p

import torch
import torch.nn as nn
from torch import empty
from torch._dynamo.testing import rand_strided
from torch._inductor.codecache import AsyncCompile
from torch._inductor.utils import print_performance
from torch._inductor.wrapper_benchmark import compiled_module_main

# with open("./dso_model.h", "rb") as f:
# dso_src = f.read().decode("utf-8")
Expand Down
3 changes: 0 additions & 3 deletions build/model_et.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from ctypes import c_void_p

import torch
import torch.nn as nn
from executorch.extension.pybindings import portable_lib as exec_lib
from torch import empty


class PTEModel(nn.Module):
Expand Down
7 changes: 5 additions & 2 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _model_generate(self, context, max_length, eos_token_id):
def eval(
model: Transformer,
tokenizer,
tasks: list = ["hellaswag"],
tasks: Optional[list] = None,
limit: Optional[int] = None,
max_seq_length: Optional[int] = None,
) -> dict:
Expand All @@ -182,6 +182,9 @@ def eval(
Returns:
eval_results (dict): A dictionary of evaluation results for the specified task(s).
"""
if tasks is None:
tasks = ["hellaswag"]

model_eval_wrapper = GPTFastEvalWrapper(
model,
tokenizer,
Expand All @@ -195,7 +198,7 @@ def eval(

if "hendrycks_test" in tasks:
tasks.remove("hendrycks_test")
tasks += [x for x in lm_eval.tasks.hendrycks_test.create_all_tasks().keys()]
tasks += list(lm_eval.tasks.hendrycks_test.create_all_tasks().keys())
task_dict = get_task_dict(tasks)

eval_results = evaluate(
Expand Down
11 changes: 1 addition & 10 deletions export_aoti.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,11 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import itertools
import sys
import time
from pathlib import Path
from typing import Optional, Tuple

import torch
import torch.nn as nn

from build.model import Transformer

from generate import decode_one_token
from quantize import quantize_model
from torch.export import Dim, export
from torch.export import Dim

default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'

Expand Down
7 changes: 1 addition & 6 deletions export_et.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import time
from pathlib import Path

import torch
import torch.nn as nn
from build.model import Transformer

from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
Expand All @@ -28,10 +25,8 @@
# )
from executorch_portable_utils import export_to_edge

from generate import decode_one_token
from quantize import get_precision, name_to_dtype, quantize_model, set_precision
from quantize import get_precision
from torch._export import capture_pre_autograd_graph
from torch.export import Dim, export


default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'
Expand Down
6 changes: 3 additions & 3 deletions scripts/convert_hf_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
@torch.inference_mode()
def convert_hf_checkpoint(
*,
checkpoint_dir: Path = Path(
"checkpoints/meta-Transformer/Transformer-2-7b-chat-hf"
),
checkpoint_dir: Optional[Path] = None,
model_name: Optional[str] = None,
) -> None:
if checkpoint_dir is None:
checkpoint_dir = Path("checkpoints/meta-Transformer/Transformer-2-7b-chat-hf")
if model_name is None:
model_name = checkpoint_dir.name

Expand Down

0 comments on commit d817fd6

Please sign in to comment.