diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py index cdf8eaac6b1c..4be2d42ebe4d 100755 --- a/nemo/export/trt_llm/tensorrt_llm_build.py +++ b/nemo/export/trt_llm/tensorrt_llm_build.py @@ -118,6 +118,14 @@ def build_and_save_engine( build_config.lora_config = lora_config model = model_cls.from_config(model_config) + if not model_config.bias and model_config.architecture == 'GPTForCausalLM': + # NOTE: GPT models in megatron-core that set bias=False sets the bias false globally + # whereas bias=False in TRTLLM GPT models sets it false everywhere except + # LayerNorm. This change makes TRTLLM's implementation match megatron-core. + for name, module in model.named_modules(): + if isinstance(module, tensorrt_llm.layers.normalization.LayerNorm): + module.bias = None + module.register_parameter('bias', None) model = optimize_model( model, use_parallel_embedding=model_config.use_parallel_embedding,