Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-org export code #9353

Merged
merged 27 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d0952e1
reorg the export code
oyilmaz-nvidia May 31, 2024
7911c52
Apply isort and black reformatting
oyilmaz-nvidia May 31, 2024
5eb797c
replaced log with raise
oyilmaz-nvidia Jun 3, 2024
f5be3e4
Merge branch 'onur/reorg_export' of https://github.com/oyilmaz-nvidia…
oyilmaz-nvidia Jun 3, 2024
2198471
add converter and loader folders
oyilmaz-nvidia Jun 4, 2024
db706c9
move nemo_ckpt_convert into the converter folder
oyilmaz-nvidia Jun 4, 2024
04637f5
move nemo_file into loader folder
oyilmaz-nvidia Jun 4, 2024
2716670
reorg converter
oyilmaz-nvidia Jun 4, 2024
1c8a54c
Apply isort and black reformatting
oyilmaz-nvidia Jun 4, 2024
ad08ca8
continue to reorg converter
oyilmaz-nvidia Jun 4, 2024
0402bd6
Merge branch 'onur/reorg_export' of https://github.com/oyilmaz-nvidia…
oyilmaz-nvidia Jun 4, 2024
58fbdcb
Apply isort and black reformatting
oyilmaz-nvidia Jun 4, 2024
70e17b9
continue to reorg
oyilmaz-nvidia Jun 4, 2024
12d4dd2
Merge branch 'onur/reorg_export' of https://github.com/oyilmaz-nvidia…
oyilmaz-nvidia Jun 4, 2024
2050be0
move nemo file back into nemo folder
oyilmaz-nvidia Jun 5, 2024
6f32d0f
renamed nemo folder to nemo_ckpt_loader
oyilmaz-nvidia Jun 5, 2024
b09dccb
remove unused function
oyilmaz-nvidia Jun 5, 2024
753b654
removed nemo file
oyilmaz-nvidia Jun 5, 2024
387e36e
Apply isort and black reformatting
oyilmaz-nvidia Jun 5, 2024
55cb60c
moved a function to tensorrt_llm_run file
oyilmaz-nvidia Jun 5, 2024
3536130
Merge branch 'onur/reorg_export' of https://github.com/oyilmaz-nvidia…
oyilmaz-nvidia Jun 5, 2024
5aabced
Apply isort and black reformatting
oyilmaz-nvidia Jun 5, 2024
410daef
Remove unused imports
oyilmaz-nvidia Jun 5, 2024
e2655a8
Merge branch 'main' into onur/reorg_export
oyilmaz-nvidia Jun 5, 2024
dc9dced
Apply isort and black reformatting
oyilmaz-nvidia Jun 5, 2024
653dab9
import csv added
oyilmaz-nvidia Jun 6, 2024
ad286ff
Merge branch 'main' into onur/reorg_export
oyilmaz-nvidia Jun 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions nemo/export/tensorrt_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@

from nemo.deploy import ITritonDeployable
from nemo.export.tarutils import TarPath, unpack_tarball
from nemo.export.trt_llm.nemo_utils import get_tokenzier, is_nemo_file, nemo_to_trtllm_config
from nemo.export.trt_llm.loader.nemo_file import get_tokenzier, load_nemo_model
from nemo.export.trt_llm.nemo_utils import is_nemo_file, model_to_trtllm_ckpt
from nemo.export.trt_llm.qnemo import qnemo_to_tensorrt_llm
from nemo.export.trt_llm.qnemo.tokenizer_utils import get_nmt_tokenizer
from nemo.export.trt_llm.tensorrt_llm_build import build_and_save_engine
Expand Down Expand Up @@ -225,15 +226,16 @@ def export(
lora_target_modules=lora_target_modules,
)
else:
weights_dicts, model_configs, self.tokenizer = nemo_to_trtllm_config(
in_file=nemo_checkpoint_path,
model, model_configs, self.tokenizer = load_nemo_model(nemo_checkpoint_path, nemo_export_dir)
weights_dicts, model_configs = model_to_trtllm_ckpt(
model=model,
nemo_model_config=model_configs,
nemo_export_dir=nemo_export_dir,
decoder_type=model_type,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
pipeline_parallel_size=pipeline_parallel_size,
use_parallel_embedding=use_parallel_embedding,
nemo_export_dir=nemo_export_dir,
save_nemo_model_config=save_nemo_model_config,
)

for weight_dict, model_config in zip(weights_dicts, model_configs):
Expand Down
13 changes: 13 additions & 0 deletions nemo/export/trt_llm/converter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,24 @@


import configparser
import json
import logging
import math
import multiprocessing
import os
import typing

from collections import defaultdict
from pathlib import Path
from typing import Union

import numpy as np
import tensorstore # This is important even though not used. Otherwise zarr raises error.
import torch
import zarr
from tensorrt_llm._utils import np_bfloat16, pad_vocab_size, str_dtype_to_torch, torch_to_numpy
from torch.distributed.checkpoint import FileSystemReader, TensorStorageMetadata
from torch.distributed.checkpoint.state_dict_loader import load_state_dict
from torch.distributed.checkpoint import FileSystemReader
from tqdm import tqdm
from transformers import AutoTokenizer, GPT2Tokenizer, LlamaConfig
from transformers import LlamaConfig

from nemo.export.tarutils import TarPath, ZarrPathStore
from nemo.export.trt_llm.nemo.convert import split_and_save_weight
from nemo.export.trt_llm.nemo.nemo import UnpackedNemoCheckpointDir, extract_layers_with_prefix, nemo_to_llm_config
from nemo.export.trt_llm.nemo.sentencepiece_tokenizer import SentencePieceTokenizer
from nemo.export.tarutils import TarPath
from nemo.export.trt_llm.converter.utils import split_and_save_weight
from nemo.export.trt_llm.nemo.nemo import extract_layers_with_prefix, nemo_to_llm_config

LOGGER = logging.getLogger("NeMo")

Expand Down Expand Up @@ -124,17 +118,6 @@ def rename_key_dist_ckpt(old_key: str, layer: int):
return new_key


def load_sharded_metadata(checkpoint_dir: Union[Path, TarPath], torch_tensor=True):
with (checkpoint_dir / 'metadata.json').open(mode='r') as f:
config_dict = json.load(f)
if config_dict['sharded_backend'] == 'zarr':
return load_sharded_metadata_zarr(checkpoint_dir, torch_tensor)
elif config_dict['sharded_backend'] == 'torch_dist':
return load_sharded_metadata_torch_dist(checkpoint_dir, torch_tensor)
else:
raise NotImplementedError(f'Distributed checkpoint backend {config_dict["sharded_backend"]} not supported')


class TarFileSystemReader(FileSystemReader):
"""Reader that accepts both Path and TarPath checkpoint directory.

Expand All @@ -148,65 +131,24 @@ def __init__(self, path: Union[Path, TarPath]) -> None:
self.storage_data = dict()


def load_sharded_metadata_torch_dist(checkpoint_dir: Union[Path, TarPath], torch_tensor=True):
fs_reader = TarFileSystemReader(checkpoint_dir)
metadata = fs_reader.read_metadata()

state_dict = {
k: torch.empty(tp.size, dtype=tp.properties.dtype)
for k, tp in metadata.state_dict_metadata.items()
if isinstance(tp, TensorStorageMetadata)
}
load_state_dict(
state_dict,
storage_reader=fs_reader,
no_dist=True,
)

if not torch_tensor:
for k, v in state_dict.items():
if v.dtype == torch.bfloat16:
state_dict[k] = v.view(torch.int16).numpy().view(np_bfloat16)
else:
state_dict[k] = v.numpy()
return state_dict


def load_sharded_metadata_zarr(checkpoint_dir: Union[Path, TarPath], torch_tensor=True):
sharded_state_dict = {}
for subdir in checkpoint_dir.iterdir():
if not subdir.is_dir() or not (subdir / '.zarray').exists():
continue
key = subdir.name

zstore = ZarrPathStore(subdir)
arr = zarr.open(zstore, 'r')

if torch_tensor:
# sharded_state_dict[key] = torch.from_numpy(arr[:].astype("float32")).to(dtype=torch.bfloat16)
if arr.dtype.name == "bfloat16":
sharded_state_dict[key] = torch.from_numpy(arr[:].view(np.int16)).view(torch.bfloat16)
else:
sharded_state_dict[key] = torch.from_numpy(arr[:])
else:
sharded_state_dict[key] = arr[:]

return sharded_state_dict


@torch.no_grad()
def convert_dist_checkpoint(unpacked_checkpoints_dir: UnpackedNemoCheckpointDir, args):
nemo_model_config = unpacked_checkpoints_dir.model_config
checkpoints_path = unpacked_checkpoints_dir.checkpoints_dir / "model_weights"
def convert_model_trt_llm_ckpt(
nemo_model_config,
model,
nemo_export_dir,
storage_type,
inference_tp_size,
decoder_type,
use_parallel_embedding,
processes,
):

# if checkpoints files could be found - start preparing output dir
out_dir = create_out_dir(args)

storage_type = str_dtype_to_torch(args.storage_type)
out_dir = create_nemo_export_dir(nemo_export_dir)
storage_type = str_dtype_to_torch(storage_type)
is_mcore = nemo_model_config.get("mcore_gpt", False)

# load position_embedding from rank 0
model = load_sharded_metadata(checkpoints_path)
model_state_dict = model.get("state_dict", model)

prefix, transformer_layer_prefix = get_layer_prefix(model_state_dict.keys(), is_mcore)
Expand All @@ -220,12 +162,11 @@ def convert_dist_checkpoint(unpacked_checkpoints_dir: UnpackedNemoCheckpointDir,
num_layers = nemo_model_config["num_layers"]
training_tp_size = 1
training_pp_size = 1
inference_tp_size = args.tensor_parallelism
num_kv_heads = nemo_model_config.get("num_query_groups", 0)
multi_query_mode = nemo_model_config.get("multi_query_mode", False)
num_attention_heads = nemo_model_config["num_attention_heads"]
kv_channels = nemo_model_config.get("kv_channels", None)
use_parallel_embedding = args.use_parallel_embedding

if num_kv_heads == 0:
if multi_query_mode:
num_kv_heads = 1
Expand All @@ -237,7 +178,7 @@ def convert_dist_checkpoint(unpacked_checkpoints_dir: UnpackedNemoCheckpointDir,
"tp_size": training_tp_size,
"split_gated_activation": nemo_model_config.get("activation", "gelu")
in ["swiglu", "geglu", "fast-swiglu", "fast-geglu"]
and (args.decoder_type == "gptnext" or is_mcore),
and (decoder_type == "gptnext" or is_mcore),
"num_attention_heads": num_attention_heads,
"num_kv_heads": num_kv_heads,
"kv_channels": kv_channels,
Expand Down Expand Up @@ -326,8 +267,8 @@ def handle_model_level_weights(model, tp_idx: int, pp_idx: int):

starmap_args = tqdm(starmap_args, desc="saving weights")

if args.processes > 1:
with multiprocessing.Pool(args.processes) as pool:
if processes > 1:
with multiprocessing.Pool(processes) as pool:
weights_dicts = pool.starmap(split_and_save_weight, starmap_args)
weights_dict_local = {k: v for d in weights_dicts for k, v in d.items()}
else:
Expand All @@ -339,21 +280,9 @@ def handle_model_level_weights(model, tp_idx: int, pp_idx: int):

for key, values in model_level_weights.items():
model_level_weights[key] = np.concatenate(values, axis=0)

weights_dict[key] = model_level_weights[key]

if nemo_model_config["tokenizer"].get("library", None) == "huggingface":
tokenizer = AutoTokenizer.from_pretrained(
nemo_model_config["tokenizer"]["type"], use_fast=nemo_model_config["tokenizer"].get("use_fast", False)
)
else:
tokenizer_config = update_tokenizer_paths(nemo_model_config["tokenizer"], unpacked_checkpoints_dir)
copy_tokenizer_files(tokenizer_config, out_dir)

tokenizer_config["model"] = os.path.join(out_dir, "tokenizer.model")
tokenizer = build_tokenizer(tokenizer_config)

return weights_dict, nemo_model_config, tokenizer
return weights_dict


@torch.no_grad()
Expand Down Expand Up @@ -554,94 +483,8 @@ def _handle_weights(src_key: str, dst_key: str, pp_src_idx: int, tensor_dim: int
return trt_inflight_weights, llm_config


def create_out_dir(args):
out_dir = Path(args.out_dir)
def create_nemo_export_dir(nemo_export_dir):
out_dir = Path(nemo_export_dir)
if not out_dir.exists():
out_dir.mkdir(parents=True)
return out_dir


def update_tokenizer_paths(tokenizer_config: typing.Dict, unpacked_checkpoints_dir):
def _update_config_entry(key, file_pattern):
old_path = tokenizer_config[key]
if old_path is None:
return
old_path = Path(old_path)
new_path = unpacked_checkpoints_dir.get_tokenizer_file_path("tokenizer", key, file_pattern)
if new_path:
LOGGER.debug(f"Update tokenizer {key} {old_path} -> {new_path}")
tokenizer_config[key] = new_path
elif not old_path.exists():
LOGGER.warning(f"Tokenizer {key}'s path {old_path} does not exists: set it to None")
tokenizer_config[key] = None

_update_config_entry("model", "*.model")
_update_config_entry("vocab_file", "*vocab*")
_update_config_entry("merge_file", "*merge*.txt")

return tokenizer_config


def copy_tokenizer_files(config, out_dir):
basenames = {
"model": "tokenizer",
"vocab_file": "vocab",
"merge_file": "merges",
}

for key in basenames.keys():
if config[key] is None:
continue

path = config[key]

if isinstance(path, str):
path = Path(path)

if not path.exists():
LOGGER.debug(f"Tokenizer {key}: {path} file not found")
continue

dst_path = out_dir / f"{basenames[key]}{path.suffix}"
LOGGER.debug(f"Copy tokenizer {key}: {path}->{dst_path}")

# Copy 'path' to 'dst_path' without shutil.copy(...) because 'path' may be a TarPath
with path.open('rb') as infile:
with open(dst_path, 'wb') as outfile:
outfile.write(infile.read())


def build_tokenizer(tokenizer):
if isinstance(tokenizer, dict):
tokenizer_config = tokenizer
if tokenizer_config["library"] == "sentencepiece":
return SentencePieceTokenizer(model_path=tokenizer_config["model"])
elif "GPT2" in tokenizer_config["type"]:
tokenizer = GPT2Tokenizer(tokenizer_config["vocab_file"], tokenizer_config["merge_file"])
else:
raise ValueError(f'Tokenizer type {tokenizer_config["library"]} not handled')

if tokenizer.bos_token_id is None:
tokenizer.add_special_tokens({"bos_token": "<s>"})
if tokenizer.eos_token_id is None:
tokenizer.add_special_tokens({"eos_token": "</s>"})
else:
try:
# If NeMo tokenizer, monkey patch interface
from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec

if isinstance(tokenizer, TokenizerSpec):

def batch_encode_patch(self, ids):
if torch.is_tensor(ids):
ids = ids.cpu().numpy()
return self.ids_to_text(ids)

tokenizer.bos_token_id = tokenizer.bos_id
tokenizer.eos_token_id = tokenizer.eos_id
tokenizer.encode = tokenizer.text_to_ids
TokenizerSpec.batch_decode = batch_encode_patch
except:
raise TypeError(f'Unsupported tokenizer build input: {type(tokenizer)}')

return tokenizer
13 changes: 13 additions & 0 deletions nemo/export/trt_llm/loader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading
Loading