diff --git a/src/transformers/models/jamba/modeling_jamba.py b/src/transformers/models/jamba/modeling_jamba.py index 5449c1fb97d48f..c6e8d425459fe0 100755 --- a/src/transformers/models/jamba/modeling_jamba.py +++ b/src/transformers/models/jamba/modeling_jamba.py @@ -210,6 +210,7 @@ class HybridMambaAttentionDynamicCache(DynamicCache): """ def __init__(self, config, batch_size, dtype=torch.float16, device=None): + super().__init__() self.dtype = dtype self.layers_block_type = config.layers_block_type self.has_previous_state = False # only used by mamba