diff --git a/plugins/accelerated-peft/README.md b/plugins/accelerated-peft/README.md
index ce37e2e7..fc2cf624 100644
--- a/plugins/accelerated-peft/README.md
+++ b/plugins/accelerated-peft/README.md
@@ -11,7 +11,7 @@ Plugin | Description | Depends | Loading | Augmentation | Callbacks
### Key Points
-- fix upcasting (resulting in slowdown) issue for `bnb` plugin, originally discovered by inventors of [Unsloth](https://unsloth.ai/blog/mistral-benchmark).
+- fix upcasting (resulting in slowdown) issue for `bnb` plugin, originally discovered by inventors of [Unsloth](https://unsloth.ai/blog/mistral-benchmark). **NOTE**: we recommend using *mixed precision* when using 4bit quant for better performance, as per our benchmarks.
- `bnb` properly configured to work with FSDP following [this guide](https://huggingface.co/docs/bitsandbytes/main/en/fsdp_qlora).
- `triton_v2` kernels are not yet properly integrated into huggingface optimum.
- `triton_v2` kernels are [the only 4bit kernels that work for training](https://github.com/AutoGPTQ/AutoGPTQ/issues/633).
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
index ad3bb5cb..1aadc08f 100644
--- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
+++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
@@ -230,6 +230,11 @@ def augmentation(
train_args: TrainingArguments,
modifiable_args: Tuple[LoraConfig],
):
+ # - when using our prepare peft, we will enforce the mixed precision settings
+ assert (
+ train_args.bf16 is True or train_args.fp16 is True
+ ), f"{self.__class__} requires mixed precision argument `--fp16` or `--bf16`"
+
(peft_config,) = modifiable_args # unpack modifiable args
# some assertions
diff --git a/plugins/fused-ops-and-kernels/README.md b/plugins/fused-ops-and-kernels/README.md
index 4aeb4c50..dcd607e2 100644
--- a/plugins/fused-ops-and-kernels/README.md
+++ b/plugins/fused-ops-and-kernels/README.md
@@ -13,18 +13,20 @@ This library contains fused operations and custom kernels, to be expanded over t
Plugin | Description | Depends | Loading | Augmentation | Callbacks
--|--|--|--|--|--
-[fast_quantized_peft](./src/fms_accelerate_foak/framework_plugin_fast_quantized_peft.py) | LoRA fused ops, fast cross-entropy, fast rms, fast RoPE | Contains extracted code | | ✅
+[fast_quantized_peft](./src/fms_accelerate_foak/framework_plugin_fast_quantized_peft.py) | LoRA fused ops, fast cross-entropy, fast rms, fast RoPE (**Disabled**) | Contains extracted code | | ✅
[fast_kernels](./src/fms_accelerate_foak/framework_plugin_fast_kernels.py) | Enhanced version of `fast_quantized_peft`, also works for full-FT and non-quant peft | Contains extracted code | | ✅
### Supported DataType Settings
**Compatibility Matrix with Mixed Precision**
torch_dtype | Mixed Precision | Full-FT-FOAK | PEFT-FOAK | QPEFT-FOAK
-- | -- | -- | -- | --
-FLOAT16 | - | ✗ Not Allowed | ✗| ✗
+FLOAT16 | - | **Compatible** | **Compatible** | ✗
FLOAT16 | FP16 | ValueError:
Attempting to
unscale FP16 gradients.
[See here](https://github.com/huggingface/peft/blob/main/docs/source/developer_guides/troubleshooting.md) | **Compatible** | **Compatible**
-BFLOAT16 | - | ✗ | ✗ | ✗
+BFLOAT16 | - | **Compatible** | **Compatible** | ✗
BFLOAT16 | BF16 | **Compatible** | **Compatible** | [Less Performant](https://github.com/foundation-model-stack/fms-acceleration/issues/84)
+NOTE: this chart is also a good reference for supported types, even for the non-FOAK case.
+
### Code Extracted from Unsloth
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
index cb39d4e6..16ea64b7 100644
--- a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
+++ b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
@@ -126,10 +126,14 @@ def augmentation(
train_args: TrainingArguments,
modifiable_args: Tuple[LoraConfig],
):
- # assert that plugin requires mixed precision to be set
- assert (
- train_args.bf16 is True or train_args.fp16 is True
- ), f"{self.__class__} requires mixed precision argument `--fp16` or `--bf16`"
+ has_quant = getattr(model, "quantization_method", None)
+
+ if has_quant:
+ # - only in the case where quant case, that we enforce the mixed precision settings
+ # - this is mostly for the fused-loras
+ assert (
+ train_args.bf16 is True or train_args.fp16 is True
+ ), f"{self.__class__} requires mixed precision argument `--fp16` or `--bf16`"
# This is designed to be a passthrough if training scenario is
# full finetuning or standard peft, fused-lora rules (only meant for qpeft)
@@ -138,7 +142,7 @@ def augmentation(
# some logic to omit terms from the filter if logic precludes
omitted = set()
- if getattr(model, "quantization_method", None) is None:
+ if has_quant is None:
# - fused_lora only required for quant-peft
omitted.add("fused_lora")
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/bnb/fast_lora.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/bnb/fast_lora.py
index 5ca4f8bf..63d7dcb7 100644
--- a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/bnb/fast_lora.py
+++ b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/bnb/fast_lora.py
@@ -69,11 +69,11 @@ def forward(ctx, X : torch.Tensor,
e = matmul_lora(X, gateW, gateW_quant, gateA, gateB, gateS, dropout=dropout_gate)
g = matmul_lora(X, upW, upW_quant, upA, upB, upS, dropout=dropout_up)
- e += gate_bias
- g += up_bias
+ if gate_bias is not None: e += gate_bias
+ if up_bias is not None: g += up_bias
h = _forward_function(e, g)
i = matmul_lora(h, downW, downW_quant, downA, downB, downS, dropout=dropout_down)
- i += down_bias
+ if down_bias is not None: i += down_bias
# Extract post-dropout X for use in backward computation
_dropped_X = []
@@ -261,9 +261,9 @@ def forward(ctx, X : torch.Tensor,
K = matmul_lora(X, KW, KW_quant, KA, KB, KS, dropout=dropout_K)
V = matmul_lora(X, VW, VW_quant, VA, VB, VS, dropout=dropout_V)
- Q += Q_bias
- K += K_bias
- V += V_bias
+ if Q_bias is not None: Q += Q_bias
+ if K_bias is not None: K += K_bias
+ if V_bias is not None: V += V_bias
# Extract post-dropout X for use in backward computation
_dropped_X = []
@@ -406,7 +406,7 @@ def forward(ctx, X : torch.Tensor,
W, W_quant, bias, A, B, S, dropout_O):
dtype = X.dtype
XW = matmul_lora(X, W, W_quant, A, B, S, dropout=dropout_O)
- XW += bias
+ if bias is not None: XW += bias
# Extract post-dropout X for use in backward computation
if dropout_O is not None:
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py
index b8e1cfcb..2f0fc89c 100644
--- a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py
+++ b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py
@@ -247,8 +247,8 @@ def forward(
e = matmul_lora(X, gateW, gateA, gateB, gateS, dropout=dropout_gate)
upW = dequant248(up_qweight, up_scales, up_qzeros, up_g_idx, up_bits)
g = matmul_lora(X, upW, upA, upB, upS, dropout=dropout_up)
- e += gate_bias
- g += up_bias
+ if gate_bias is not None: e += gate_bias
+ if up_bias is not None: g += up_bias
# f = torch.nn.functional.silu(e)
# h = f * g
h = swiglu_fg_kernel(e, g)
@@ -257,7 +257,7 @@ def forward(
down_qweight, down_scales, down_qzeros, down_g_idx, down_bits
)
i = matmul_lora(h, downW, downA, downB, downS, dropout=dropout_down)
- i += down_bias
+ if down_bias is not None: i += down_bias
ctx.custom_saved_tensors = (
gate_qweight,
@@ -529,9 +529,9 @@ def forward(
K = matmul_lora(X, KW, KA, KB, KS, dropout=dropout_K)
V = matmul_lora(X, VW, VA, VB, VS, dropout=dropout_V)
- Q += Q_bias
- K += K_bias
- V += V_bias
+ if Q_bias is not None: Q += Q_bias
+ if K_bias is not None: K += K_bias
+ if V_bias is not None: V += V_bias
ctx.custom_saved_tensors = (
Q_qweight,
@@ -774,7 +774,7 @@ def forward(
):
W = dequant248(O_qweight, O_scales, O_qzeros, O_g_idx, O_bits)
XW = matmul_lora(X, W, A, B, S, dropout=dropout_O)
- XW += O_bias
+ if O_bias is not None: XW += O_bias
del W
ctx.custom_saved_tensors = (
O_qweight,
@@ -843,6 +843,6 @@ def apply_lora_o(self, X):
# added by flim@sg.ibm.com
# this version can be directly patched on the output linear
def apply_lora_o_v2(self, X):
- Oqstate, O_bias, OA, OB, OS, dropout = get_lora_parameters(self.o_proj)
+ Oqstate, O_bias, OA, OB, OS, dropout = get_lora_parameters(self)
O = LoRA_W.apply(X, *unpack_gptqstate(Oqstate), O_bias, OA, OB, OS, dropout)
return O
diff --git a/scripts/benchmarks/accelerate.yaml b/scripts/benchmarks/accelerate.yaml
index f70d74fa..7923e624 100644
--- a/scripts/benchmarks/accelerate.yaml
+++ b/scripts/benchmarks/accelerate.yaml
@@ -14,9 +14,10 @@ fsdp_config:
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
# this controls the FSDP pipelining
- fsdp_backward_prefetch_policy: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline
+ fsdp_backward_prefetch: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline
# but requires the most memory. BACKWARD_POST is the less
# memory intensive option
+ fsdp_backward_prefetch_policy: BACKWARD_PRE # for backward compatibility accelerate<1.0
# setting this to true will increase forward memory by prefetching the next FSDP all-gather, while performing
# the current forward pass.
diff --git a/scripts/benchmarks/compare_with_reference.py b/scripts/benchmarks/compare_with_reference.py
index 9a9c27d8..8ef420da 100644
--- a/scripts/benchmarks/compare_with_reference.py
+++ b/scripts/benchmarks/compare_with_reference.py
@@ -63,24 +63,30 @@ def compare_results(df, ref, plot_columns, threshold_ratio=0.1):
ref_series = ref[column].fillna(0)
df_series = df[column].fillna(0)
# Extract outliers base on some threshold % difference on referance
- ds = abs(df_series - ref_series) / (ref_series + 1e-9)
- outliers = ds.index[ds > threshold_ratio].to_list()
+ cmp = ref_series.to_frame()
+ cmp['metric'] = column
+ cmp = cmp.join(df_series.to_frame(), lsuffix='_ref')
+ cmp = cmp.rename(columns={f'{column}_ref': 'reference', column: 'new'})
+ cmp['ds'] = cmp.apply(
+ lambda x: (
+ abs(x.reference - x.new) / (x.reference + 1e-9)
+ ), axis=1
+ )
+ outliers = cmp[cmp.ds > threshold_ratio]
+ outliers = outliers.drop('ds', axis=1)
+
plot_chart(
ax,
- ref_series,
- df_series,
+ cmp['reference'],
+ cmp['new'],
title=f"Metric: {column}",
xlabel="Reference",
ylabel="New",
)
charts.append((ax, f"compare-{column}.jpg"))
- total_outliers += [
- [column, *outlier, ref_series[outlier].item(), df_series[outlier].item()]
- for outlier in outliers
- ]
- outliers_df = pd.DataFrame(
- total_outliers, columns=["scenario", *df.index.names, "reference", "new"]
- )
+ total_outliers.append(outliers)
+
+ outliers_df = pd.concat(total_outliers)
return outliers_df, outliers, charts
diff --git a/scripts/benchmarks/display_bench_results.py b/scripts/benchmarks/display_bench_results.py
index a9bbf800..815941b1 100644
--- a/scripts/benchmarks/display_bench_results.py
+++ b/scripts/benchmarks/display_bench_results.py
@@ -38,7 +38,12 @@ def main(
df[c] = constant[c]
kept += 1
- df = df.reset_index(drop=True).drop("output_dir", axis=1)
+ df = df.reset_index(drop=True)
+ try:
+ df = df.drop("output_dir", axis=1)
+ except KeyError:
+ pass # output_dir not found
+
df.reindex(sorted(df.columns), axis=1).to_csv(output_filename, index=False)
print("***************** Report Created ******************")
print(f"Total lines: '{len(df)}'")
diff --git a/scripts/benchmarks/refs/a100_80gb.csv b/scripts/benchmarks/refs/a100_80gb.csv
old mode 100644
new mode 100755
index abb7f2bc..1009d57f
--- a/scripts/benchmarks/refs/a100_80gb.csv
+++ b/scripts/benchmarks/refs/a100_80gb.csv
@@ -1,125 +1,125 @@
bf16,epoch,fp16,framework_config,learning_rate,lora_alpha,lora_dropout,mem_nvidia_mem_reserved,mem_peak_torch_mem_alloc_in_bytes,mem_torch_mem_alloc_in_bytes,model_name_or_path,num_gpus,peft_method,per_device_train_batch_size,r,target_modules,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
-True,0.07,,none,2e-5,,,15359.0,13632690688.0,6770300416.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.332193660736084,51.1308,7.823,1.956,16021.654
-True,0.07,,none,2e-5,,,16292.0,11310628864.0,9062559744.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.1947376251220705,34.4961,11.596,2.899,11873.81
-True,0.14,,none,2e-5,,,22507.0,20492466688.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3124921417236326,96.6986,8.273,1.034,16943.362
-True,0.14,,none,2e-5,,,19442.0,13862536704.0,9063688704.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.169607696533203,56.0038,14.285,1.786,14627.569
-True,0.07,,foak-fast-kernels,2e-5,,,14647.0,12021062144.0,6769251840.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.3321532440185546,51.9014,7.707,1.927,15783.76
-True,0.07,,foak-fast-kernels,2e-5,,,15159.0,11312634880.0,9064565760.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.1948485946655274,34.2526,11.678,2.919,11958.203
-True,0.14,,foak-fast-kernels,2e-5,,,19435.0,17273076224.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3125320434570313,95.1025,8.412,1.051,17227.735
-True,0.14,,foak-fast-kernels,2e-5,,,18982.0,12252922880.0,9064710144.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.1695573806762694,56.1474,14.248,1.781,14590.174
-True,0.15,,none,2e-5,,,76047.0,72434853376.0,43467892224.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8285089540481567,541.4379,0.739,0.185,3026.016
-True,0.15,,none,2e-5,,,77716.0,72434657280.0,57951176704.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8260897445678711,309.3386,1.293,0.323,2648.231
-True,0.29,,none,2e-5,,,71823.0,72435246592.0,43468285440.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8293021202087403,1053.8179,0.759,0.095,3109.456
-True,0.29,,none,2e-5,,,77628.0,72434853888.0,57951373312.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8233438396453857,565.1788,1.415,0.177,2898.906
-True,0.15,,foak-fast-kernels,2e-5,,,76071.0,72432723456.0,43466827264.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8281177949905395,483.8157,0.827,0.207,3386.414
-True,0.15,,foak-fast-kernels,2e-5,,,77736.0,72434657280.0,57951176704.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8248114776611328,279.656,1.43,0.358,2929.313
-True,0.29,,foak-fast-kernels,2e-5,,,70035.0,72433116672.0,43467220480.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8302128696441651,936.0343,0.855,0.107,3500.726
-True,0.29,,foak-fast-kernels,2e-5,,,80751.0,72434853888.0,57951373312.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8242907524108887,505.4347,1.583,0.198,3241.566
-True,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,,
-True,,,none,2e-5,,,81090.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,,
-True,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,,
-True,,,none,2e-5,,,79873.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,79873.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,80448.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,,
-True,,,none,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,,
-True,,,none,2e-5,,,80307.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,,
-True,,,none,2e-5,,,78361.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,,
-True,,,none,2e-5,,,80873.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,80307.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-5,,,80873.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,,
-True,0.15,,none,2e-4,16,0.1,28769.0,25681144320.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8505570697784424,481.2995,0.831,0.208,3404.117
-True,0.15,,none,2e-4,16,0.1,17316.0,14975934464.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8524067306518555,277.4993,1.441,0.36,2952.08
-True,0.29,,none,2e-4,16,0.1,42809.0,36670876160.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8525794410705566,953.5883,0.839,0.105,3436.284
-True,0.29,,none,2e-4,16,0.1,24995.0,21622071296.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8535801601409913,503.0453,1.59,0.199,3256.963
-True,0.15,,foak-fast-kernels,2e-4,16,0.1,27511.0,23530188288.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8501485443115234,422.1615,0.948,0.237,3880.979
-True,0.15,,foak-fast-kernels,2e-4,16,0.1,16963.0,14774607872.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8515177631378174,253.1253,1.58,0.395,3236.341
-True,0.29,,foak-fast-kernels,2e-4,16,0.1,40271.0,32393276928.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8585422229766846,835.7668,0.957,0.12,3920.711
-True,0.29,,foak-fast-kernels,2e-4,16,0.1,23845.0,21219418112.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8570475673675537,447.0688,1.789,0.224,3664.76
-True,,,none,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.15,,none,2e-4,16,0.1,61260.0,57922956288.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.890601749420166,522.9286,0.765,0.191,1566.562
-True,,,none,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.29,,none,2e-4,16,0.1,69154.0,65045124608.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8849094486236573,877.0711,0.912,0.114,1868.036
-True,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.15,,foak-fast-kernels,2e-4,16,0.1,61428.0,57688308736.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8905545234680176,494.0377,0.81,0.202,1658.173
-True,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.29,,foak-fast-kernels,2e-4,16,0.1,68700.0,64576132608.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8864504814147949,823.1006,0.972,0.121,1990.522
-True,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,none,2e-4,16,0.1,81003.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,none,2e-4,16,0.1,81085.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-4,16,0.1,80875.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,,,foak-fast-kernels,2e-4,16,0.1,80993.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24727.0,20556796416.0,4307044864.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8730130004882812,575.2521,0.695,0.174,2848.143
-True,0.15,,baseline-peft-bnb,2e-4,16,0.1,11914.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8705758380889893,282.8263,1.414,0.354,2896.477
-True,0.29,,baseline-peft-bnb,2e-4,16,0.1,44721.0,36801860096.0,4307438080.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8701838970184326,1116.3381,0.717,0.09,2935.311
-True,0.29,,baseline-peft-bnb,2e-4,16,0.1,19423.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8743645858764648,490.0888,1.632,0.204,3343.068
-True,0.15,,baseline-peft-bnb,2e-4,16,0.1,43775.0,43550715392.0,25201920512.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8938954734802246,1082.9658,0.369,0.092,1512.883
-True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24068.0,21767946240.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8936581707000733,521.8356,0.767,0.192,1569.843
-True,0.29,,baseline-peft-bnb,2e-4,16,0.1,63329.0,61500009472.0,25202313728.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8923088932037353,1961.7179,0.408,0.051,1670.373
-True,0.29,,baseline-peft-bnb,2e-4,16,0.1,31356.0,28883934208.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.889978551864624,879.0985,0.91,0.114,1863.727
-True,,,baseline-peft-bnb,2e-4,16,0.1,80247.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.14,,baseline-peft-bnb,2e-4,16,0.1,51569.0,46684804608.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,1.0016851806640625,1892.8443,0.211,0.053,432.788
+,0.07,,none,2e-5,,,13947.0,11267745280.0,6770300416.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.3228125,46.9306,8.523,2.131,17455.544
+,0.07,,none,2e-5,,,9175.0,5713781760.0,4522987520.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.3228515625,30.5729,13.083,3.271,13397.488
+,0.14,,none,2e-5,,,16933.0,15193274880.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3215625,87.7376,9.118,1.14,18673.855
+,0.14,,none,2e-5,,,11612.0,7819433984.0,4522951680.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.321015625,50.335,15.894,1.987,16274.971
+,0.07,,foak-fast-kernels,2e-5,,,13951.0,11267745280.0,6770300416.0,bigcode/gpt_bigcode-santacoder,1,,4,,,bfloat16,2.3228135108947754,47.3585,8.446,2.112,17297.83
+,0.07,,foak-fast-kernels,2e-5,,,9180.0,5657082880.0,4521985024.0,bigcode/gpt_bigcode-santacoder,2,,2,,,bfloat16,2.3232896614074705,31.9424,12.523,3.131,12823.079
+,0.14,,foak-fast-kernels,2e-5,,,16937.0,13582956544.0,6769448448.0,bigcode/gpt_bigcode-santacoder,1,,8,,,bfloat16,2.3213717842102053,87.7202,9.12,1.14,18677.564
+,0.14,,foak-fast-kernels,2e-5,,,11240.0,7014914560.0,4523591680.0,bigcode/gpt_bigcode-santacoder,2,,4,,,bfloat16,2.3211612129211425,50.9859,15.691,1.961,16067.19
+,0.15,,none,2e-5,,,76047.0,72434853376.0,43467892224.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8395502376556396,536.4621,0.746,0.186,3054.083
+,0.15,,none,2e-5,,,43214.0,36225955840.0,28984215552.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8395203018188476,295.4597,1.354,0.338,2772.629
+,0.29,,none,2e-5,,,71823.0,72435246592.0,43468285440.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8336902141571045,1043.037,0.767,0.096,3141.595
+,0.29,,none,2e-5,,,52384.0,36226152448.0,28984412160.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8332884168624878,545.5655,1.466,0.183,3003.122
+,0.15,,foak-fast-kernels,2e-5,,,76071.0,72432723456.0,43466827264.0,mistralai/Mistral-7B-v0.1,1,,4,,,bfloat16,0.8396030998229981,477.3215,0.838,0.21,3432.487
+,0.15,,foak-fast-kernels,2e-5,,,42923.0,36225955840.0,28984215552.0,mistralai/Mistral-7B-v0.1,2,,2,,,bfloat16,0.8396362972259521,265.5943,1.506,0.377,3084.404
+,0.29,,foak-fast-kernels,2e-5,,,70035.0,72433116672.0,43467220480.0,mistralai/Mistral-7B-v0.1,1,,8,,,bfloat16,0.8332135772705078,924.9925,0.865,0.108,3542.515
+,0.29,,foak-fast-kernels,2e-5,,,51883.0,36226152448.0,28984412160.0,mistralai/Mistral-7B-v0.1,2,,4,,,bfloat16,0.8333692359924316,488.0919,1.639,0.205,3356.745
+,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,,
+,,,none,2e-5,,,79397.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,,
+,,,none,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,,
+,,,none,2e-5,,,80676.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,4,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,80696.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,2,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,81193.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,8,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,80688.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,4,,,bfloat16,,,,,
+,,,none,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,,
+,,,none,2e-5,,,81089.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,,
+,,,none,2e-5,,,76729.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,,
+,,,none,2e-5,,,80732.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,78361.0,,,NousResearch/Llama-2-70b-hf,1,,4,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,81005.0,,,NousResearch/Llama-2-70b-hf,2,,2,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,81177.0,,,NousResearch/Llama-2-70b-hf,1,,8,,,bfloat16,,,,,
+,,,foak-fast-kernels,2e-5,,,80973.0,,,NousResearch/Llama-2-70b-hf,2,,4,,,bfloat16,,,,,
+,0.15,,none,2e-4,16,0.1,28065.0,25653881344.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8610371589660645,488.6935,0.819,0.205,3352.613
+,0.15,,none,2e-4,16,0.1,17741.0,15245549568.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.861702070236206,277.7338,1.44,0.36,2949.587
+,0.29,,none,2e-4,16,0.1,41401.0,36643613184.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8568434524536133,967.4845,0.827,0.103,3386.928
+,0.29,,none,2e-4,16,0.1,25343.0,22161170432.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8567116451263428,512.6501,1.561,0.195,3195.942
+,0.15,,foak-fast-kernels,2e-4,16,0.1,27895.0,24068304384.0,14664508928.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8614468479156494,429.9031,0.93,0.233,3811.091
+,0.15,,foak-fast-kernels,2e-4,16,0.1,17042.0,15044222976.0,7368046592.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8614903545379639,248.7361,1.608,0.402,3293.45
+,0.29,,foak-fast-kernels,2e-4,16,0.1,41039.0,33470361088.0,14664902144.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8572147083282471,849.9151,0.941,0.118,3855.444
+,0.29,,foak-fast-kernels,2e-4,16,0.1,24265.0,21758517248.0,7368243200.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8567188644409179,455.0611,1.758,0.22,3600.395
+,,,none,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,0.15,,none,2e-4,16,0.1,61924.0,58193823232.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.9005669975280761,520.7225,0.768,0.192,1573.199
+,,,none,2e-4,16,0.1,81015.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,0.29,,none,2e-4,16,0.1,69602.0,65584260096.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8976198959350586,885.1412,0.904,0.113,1851.004
+,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,0.15,,foak-fast-kernels,2e-4,16,0.1,61592.0,57960430080.0,47365978112.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8999550151824951,493.9226,0.81,0.202,1658.559
+,,,foak-fast-kernels,2e-4,16,0.1,81127.0,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,0.29,,foak-fast-kernels,2e-4,16,0.1,69048.0,65130968576.0,47366174720.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8960096549987793,831.6102,0.962,0.12,1970.154
+,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,none,2e-4,16,0.1,81110.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,none,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,none,2e-4,16,0.1,80991.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,foak-fast-kernels,2e-4,16,0.1,81205.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,foak-fast-kernels,2e-4,16,0.1,81097.0,,,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,foak-fast-kernels,2e-4,16,0.1,80147.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+,,,foak-fast-kernels,2e-4,16,0.1,80991.0,,,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24727.0,20556796416.0,4307044864.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8762012290954589,577.341,0.693,0.173,2837.838
+True,0.15,,baseline-peft-bnb,2e-4,16,0.1,11963.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.8764579200744629,284.0353,1.408,0.352,2884.148
+True,0.29,,baseline-peft-bnb,2e-4,16,0.1,44721.0,36801860096.0,4307438080.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8748912715911865,1119.4029,0.715,0.089,2927.275
+True,0.29,,baseline-peft-bnb,2e-4,16,0.1,19429.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8751678466796875,489.398,1.635,0.204,3347.787
+True,0.15,,baseline-peft-bnb,2e-4,16,0.1,44079.0,43523053568.0,25201920512.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.9031138801574707,1086.6887,0.368,0.092,1507.699
+True,0.15,,baseline-peft-bnb,2e-4,16,0.1,24088.0,21763944448.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,0.9032989406585693,521.0491,0.768,0.192,1572.213
+True,0.29,,baseline-peft-bnb,2e-4,16,0.1,62685.0,61477128704.0,25202313728.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,0.8976852321624755,1968.9722,0.406,0.051,1664.219
+True,0.29,,baseline-peft-bnb,2e-4,16,0.1,31061.0,28880510976.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,0.8985180091857911,883.388,0.906,0.113,1854.678
+True,,,baseline-peft-bnb,2e-4,16,0.1,79737.0,,,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
+True,0.14,,baseline-peft-bnb,2e-4,16,0.1,51545.0,46685328896.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj,bfloat16,1.0048488330841066,1896.3914,0.211,0.053,431.978
True,,,baseline-peft-bnb,2e-4,16,0.1,79933.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj,bfloat16,,,,,
-True,0.28,,baseline-peft-bnb,2e-4,16,0.1,80853.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,1.0005127334594726,3608.5763,0.222,0.028,454.029
-True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,11429.0,9148997120.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4391163635253905,54.3258,7.363,1.841,15079.404
-True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,7308.0,4788195328.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.43807243347168,51.0965,7.828,1.957,8016.205
-True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,21921.0,17486716416.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4325622367858886,101.7155,7.865,0.983,16107.672
-True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,12455.0,8957644800.0,411315200.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.433025417327881,56.9194,14.055,1.757,14392.278
-True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,9125.0,7538417152.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4385263442993166,54.1754,7.383,1.846,15121.253
-True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,6102.0,3989590016.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4416963958740237,32.8776,12.166,3.042,12458.335
-True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,17313.0,14266736128.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.432781238555908,100.6833,7.946,0.993,16272.811
-True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,10171.0,7353749504.0,411315200.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4364870262145994,55.5047,14.413,1.802,14759.122
-True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,18263.0,15323147776.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8739612770080566,461.5658,0.867,0.217,3549.656
-True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,11981.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8697105979919434,282.2485,1.417,0.354,2902.407
-True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,32687.0,26312879616.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8721167373657227,905.4543,0.884,0.11,3618.957
-True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,19379.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8676586151123047,490.4414,1.631,0.204,3340.664
-True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,18809.0,13064809472.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8674864864349365,397.3926,1.007,0.252,4122.875
-True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,11734.0,9309332480.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8727792549133301,216.4955,1.848,0.462,3783.912
-True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,31953.0,21823466496.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8684949207305909,776.5844,1.03,0.129,4219.503
-True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,18598.0,15685985280.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8712387371063233,404.6605,1.977,0.247,4048.826
-True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,37347.0,36218023424.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8953837585449219,839.4246,0.477,0.119,1951.813
-True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,23942.0,21767827968.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8941266441345215,518.8796,0.771,0.193,1578.786
-True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,49889.0,47207755264.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8910543060302735,1567.3902,0.51,0.064,2090.609
-True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,31310.0,28881018368.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.891448860168457,876.3875,0.913,0.114,1869.493
-True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,37423.0,34870765056.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8943702983856201,774.2084,0.517,0.129,2116.226
-True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,23972.0,21485080576.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.896587963104248,456.2499,0.877,0.219,1795.507
-True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,49907.0,44414669824.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8900082683563233,1436.5433,0.557,0.07,2281.031
-True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,30617.0,28262693888.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.892123146057129,789.7475,1.013,0.127,2074.587
-True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,71641.0,68126422016.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0015915203094483,3497.702,0.114,0.029,468.422
-True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,51531.0,46684804608.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0012191390991212,1893.9698,0.211,0.053,432.531
-True,,,accelerated-peft-bnb,2e-4,16,0.1,81009.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,,
-True,0.28,,accelerated-peft-bnb,2e-4,16,0.1,80647.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.999879560470581,3609.2665,0.222,0.028,453.943
-True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,71067.0,67048944640.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0010389518737792,3195.764,0.125,0.031,512.679
-True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,51369.0,46407128576.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.002256908416748,1682.2067,0.238,0.059,486.979
-True,,,accelerated-peft-bnb-foak,2e-4,16,0.1,80783.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,,
-True,0.28,,accelerated-peft-bnb-foak,2e-4,16,0.1,80919.0,71810192896.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9998698234558105,3242.0337,0.247,0.031,505.362
-,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,18785.0,15353458176.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9844318866729737,481.3534,0.831,0.208,3403.736
-,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,12310.0,9542804992.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9955140018463134,287.5048,1.391,0.348,2849.344
-,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,32579.0,26343190016.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9898070430755616,946.6898,0.845,0.106,3461.324
-,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,19842.0,16188941824.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9835797500610352,504.1388,1.587,0.198,3249.898
-,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,18553.0,13095119872.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9779060745239258,412.9784,0.969,0.242,3967.278
-,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,12109.0,9326863872.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0228476333618164,221.6896,1.804,0.451,3695.257
-,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,32337.0,21853776896.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9703095436096192,810.0302,0.988,0.123,4045.281
-,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,19074.0,15703516672.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0047074699401854,418.3267,1.912,0.239,3916.556
-,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,36435.0,35528093184.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9047156238555908,832.581,0.48,0.12,1967.857
-,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,23573.0,21067999744.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9067060089111328,498.7756,0.802,0.2,1642.422
-,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,49007.0,46517825024.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9023971652984619,1584.7821,0.505,0.063,2067.666
-,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,30557.0,28182132736.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9020897960662841,869.9509,0.92,0.115,1883.325
-,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,36947.0,34185567744.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9064445781707764,762.6001,0.525,0.131,2148.439
-,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,23659.0,20783364608.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9081688308715821,433.9353,0.922,0.23,1887.839
-,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,50659.0,43785179648.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9031758785247803,1447.8847,0.553,0.069,2263.164
-,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,30421.0,27563599360.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9036233234405517,779.5952,1.026,0.128,2101.603
-,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,70525.0,67069752832.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9943218612670899,3572.5902,0.112,0.028,458.603
-,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,50590.0,45638032384.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9938594245910645,1914.0025,0.209,0.052,428.004
+True,0.28,,baseline-peft-bnb,2e-4,16,0.1,81217.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj,bfloat16,1.0077042770385742,3623.214,0.221,0.028,452.195
+True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,11429.0,9148997120.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4330611038208008,54.1881,7.382,1.845,15117.718
+True,0.07,,accelerated-peft-bnb,2e-4,16,0.1,7286.0,4788195328.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4328096199035643,47.1007,8.492,2.123,8696.257
+True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,21921.0,17486716416.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4441967582702637,101.1803,7.907,0.988,16192.874
+True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,12373.0,8957644800.0,411315200.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.443736057281494,56.188,14.238,1.78,14579.636
+True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,9125.0,7538417152.0,810277376.0,bigcode/gpt_bigcode-santacoder,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4337510871887207,53.7762,7.438,1.86,15233.491
+True,0.07,,accelerated-peft-bnb-foak,2e-4,16,0.1,6152.0,3989590016.0,411216896.0,bigcode/gpt_bigcode-santacoder,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4375525856018068,33.3504,11.994,2.998,12281.723
+True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,17313.0,14266736128.0,810473984.0,bigcode/gpt_bigcode-santacoder,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4446635818481446,100.4005,7.968,0.996,16318.65
+True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,10159.0,7354667008.0,412232704.0,bigcode/gpt_bigcode-santacoder,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,2.4469522857666015,55.2559,14.478,1.81,14825.56
+True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,18263.0,15323147776.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8766812419891358,458.3363,0.873,0.218,3574.668
+True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,11954.0,9525273600.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.876628999710083,280.7414,1.425,0.356,2917.988
+True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,32687.0,26312879616.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8740875625610351,897.5335,0.891,0.111,3650.895
+True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,19434.0,16171410432.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8732735443115235,486.3289,1.645,0.206,3368.913
+True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,18809.0,13064809472.0,4306512384.0,mistralai/Mistral-7B-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8768407535552979,393.8211,1.016,0.254,4160.265
+True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,11743.0,9309332480.0,2244541440.0,mistralai/Mistral-7B-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.879526081085205,214.3097,1.866,0.467,3822.505
+True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,31953.0,21823466496.0,4306905600.0,mistralai/Mistral-7B-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8728581523895264,767.8082,1.042,0.13,4267.733
+True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,18668.0,15685985280.0,2244738048.0,mistralai/Mistral-7B-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.8741103363037109,402.1934,1.989,0.249,4073.662
+True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,37363.0,36218023424.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9028711032867431,834.4269,0.479,0.12,1963.503
+True,0.15,,accelerated-peft-bnb,2e-4,16,0.1,24011.0,21766241792.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9032711505889892,520.5815,0.768,0.192,1573.625
+True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,49945.0,47207755264.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9003755378723145,1554.493,0.515,0.064,2107.954
+True,0.29,,accelerated-peft-bnb,2e-4,16,0.1,31205.0,28888264192.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.89886399269104,874.6595,0.915,0.114,1873.186
+True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,37475.0,34863740928.0,25201388032.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9039991569519042,768.7027,0.52,0.13,2131.383
+True,0.15,,accelerated-peft-bnb-foak,2e-4,16,0.1,23991.0,21482332160.0,13273627648.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9055265331268311,455.3487,0.878,0.22,1799.061
+True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,50207.0,44408959488.0,25201781248.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.900047550201416,1424.4872,0.562,0.07,2300.337
+True,0.29,,accelerated-peft-bnb-foak,2e-4,16,0.1,30477.0,28267988992.0,13273824256.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,0.9013978958129882,786.3614,1.017,0.127,2083.52
+True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,71641.0,68126422016.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.007793321609497,3457.3761,0.116,0.029,473.885
+True,0.14,,accelerated-peft-bnb,2e-4,16,0.1,51545.0,46685328896.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0082451915740966,1874.0719,0.213,0.053,437.123
+True,,,accelerated-peft-bnb,2e-4,16,0.1,81013.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,,
+True,0.28,,accelerated-peft-bnb,2e-4,16,0.1,81155.0,72625788416.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0077899551391603,3583.0892,0.223,0.028,457.259
+True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,71067.0,67048944640.0,37179042816.0,NousResearch/Llama-2-70b-hf,1,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0081148910522462,3150.1484,0.127,0.032,520.102
+True,0.14,,accelerated-peft-bnb-foak,2e-4,16,0.1,51384.0,46407652864.0,19266784768.0,NousResearch/Llama-2-70b-hf,2,lora,2,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.0077662754058838,1669.549,0.24,0.06,490.671
+True,,,accelerated-peft-bnb-foak,2e-4,16,0.1,80785.0,,,NousResearch/Llama-2-70b-hf,1,lora,8,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,,,,,
+True,0.28,,accelerated-peft-bnb-foak,2e-4,16,0.1,81179.0,71810192896.0,19266981376.0,NousResearch/Llama-2-70b-hf,2,lora,4,16,q_proj k_proj v_proj o_proj c_attn,bfloat16,1.00919659614563,3214.671,0.249,0.031,509.663
+,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,18785.0,15353458176.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0203185558319092,473.9743,0.844,0.211,3456.727
+,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,12264.0,9542804992.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0454671478271484,285.4078,1.402,0.35,2870.279
+,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,32579.0,26343190016.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,1.0013581275939942,936.5643,0.854,0.107,3498.745
+,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,19843.0,16188941824.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0203004932403565,500.8119,1.597,0.2,3271.488
+,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,18553.0,13095119872.0,4336822784.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0269924259185792,405.7291,0.986,0.246,4038.162
+,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,12099.0,9326863872.0,2261220352.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,1.0549799442291259,220.3857,1.815,0.454,3717.119
+,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,32337.0,21853776896.0,4337216000.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,1.0090702056884766,799.4622,1.001,0.125,4098.755
+,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,19033.0,15703516672.0,2261416960.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,1.0229192638397218,415.4519,1.926,0.241,3943.657
+,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,36437.0,35528093184.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9015938568115235,823.3572,0.486,0.121,1989.902
+,0.15,True,accelerated-peft-autogptq,2e-4,16,0.1,23495.0,21066623488.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9031338882446289,496.1844,0.806,0.202,1650.999
+,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,48911.0,46517825024.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.8999896621704102,1570.5214,0.509,0.064,2086.441
+,0.29,True,accelerated-peft-autogptq,2e-4,16,0.1,30507.0,28181665792.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.901834602355957,865.0845,0.925,0.116,1893.919
+,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,36751.0,34179364864.0,24511457792.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9031289768218994,755.1622,0.53,0.132,2169.6
+,0.15,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,23763.0,20783753728.0,12581256192.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9057476425170898,430.5808,0.929,0.232,1902.546
+,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,49931.0,43752453120.0,24511851008.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,0.9008800506591796,1431.8707,0.559,0.07,2288.475
+,0.29,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,30254.0,27564300288.0,12581452800.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9021099472045898,775.2843,1.032,0.129,2113.289
+,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,70525.0,67069752832.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9996613597869873,3526.9139,0.113,0.028,464.542
+,0.14,True,accelerated-peft-autogptq,2e-4,16,0.1,50441.0,45638032384.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9989643859863281,1896.7525,0.211,0.053,431.896
,,True,accelerated-peft-autogptq,2e-4,16,0.1,79895.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,,
-,0.28,True,accelerated-peft-autogptq,2e-4,16,0.1,80748.0,71579016192.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9892638874053955,3677.8684,0.218,0.027,445.475
-,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,70443.0,65992275456.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9935803413391113,3250.1642,0.123,0.031,504.098
-,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,50948.0,45360356352.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9940973091125488,1681.9931,0.238,0.059,487.041
+,0.28,True,accelerated-peft-autogptq,2e-4,16,0.1,80628.0,71579016192.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9983775997161866,3672.3911,0.218,0.027,446.14
+,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,70443.0,65992275456.0,36122373120.0,TheBloke/Llama-2-70B-GPTQ,1,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9990997219085693,3204.0568,0.125,0.031,511.352
+,0.14,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,51069.0,45360356352.0,18219970048.0,TheBloke/Llama-2-70B-GPTQ,2,lora,2,16,q_proj k_proj v_proj o_proj,float16,0.9999132919311523,1675.1738,0.239,0.06,489.024
,,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,81077.0,,,TheBloke/Llama-2-70B-GPTQ,1,lora,8,16,q_proj k_proj v_proj o_proj,float16,,,,,
-,0.28,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,80617.0,70763420672.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9896932983398438,3295.444,0.243,0.03,497.171
+,0.28,True,accelerated-peft-autogptq-foak,2e-4,16,0.1,80592.0,70763420672.0,18220166656.0,TheBloke/Llama-2-70B-GPTQ,2,lora,4,16,q_proj k_proj v_proj o_proj,float16,0.9987747859954834,3294.6291,0.243,0.03,497.294
diff --git a/scripts/benchmarks/refs/requirements.txt b/scripts/benchmarks/refs/requirements.txt
index ad534377..3d427276 100644
--- a/scripts/benchmarks/refs/requirements.txt
+++ b/scripts/benchmarks/refs/requirements.txt
@@ -1,41 +1,41 @@
-accelerate==0.33.0
-aiohappyeyeballs==2.4.0
-aiohttp==3.10.5
+accelerate==0.34.2
+aiohappyeyeballs==2.4.3
+aiohttp==3.10.10
aiosignal==1.3.1
async-timeout==4.0.3
attrs==24.2.0
bitsandbytes==0.43.3
certifi==2024.8.30
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
contourpy==1.3.0
cycler==0.12.1
datasets==2.21.0
dill==0.3.8
docstring_parser==0.16
einops==0.8.0
-filelock==3.16.0
+filelock==3.16.1
flash-attn==2.6.3
--e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration&subdirectory=plugins/framework
--e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_aadp&subdirectory=plugins/attention-and-distributed-packing
--e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels
--e git+https://github.com/foundation-model-stack/fms-acceleration.git@4851bf363014216e6d938c776b8af3103aca5082#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft
-fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@c40ae7f1615b95b2d0c5f02206d1a3799b0f615c
-fonttools==4.53.1
+-e git+https://github.com/foundation-model-stack/fms-acceleration.git@8178cd5576957e979997aa574078ab5155aa6f20#egg=fms_acceleration&subdirectory=plugins/framework
+-e git+https://github.com/foundation-model-stack/fms-acceleration.git@8178cd5576957e979997aa574078ab5155aa6f20#egg=fms_acceleration_aadp&subdirectory=plugins/attention-and-distributed-packing
+-e git+https://github.com/foundation-model-stack/fms-acceleration.git@8178cd5576957e979997aa574078ab5155aa6f20#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels
+-e git+https://github.com/foundation-model-stack/fms-acceleration.git@8178cd5576957e979997aa574078ab5155aa6f20#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft
+fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@d36020230b3e4c743f61848d3e37ef163fae2dfd
+fonttools==4.54.1
frozenlist==1.4.1
fsspec==2024.6.1
-huggingface-hub==0.24.7
-idna==3.8
+huggingface-hub==0.25.2
+idna==3.10
Jinja2==3.1.4
kiwisolver==1.4.7
llvmlite==0.43.0
markdown-it-py==3.0.0
-MarkupSafe==2.1.5
+MarkupSafe==3.0.1
matplotlib==3.9.2
mdurl==0.1.2
mpmath==1.3.0
multidict==6.1.0
multiprocess==0.70.16
-networkx==3.3
+networkx==3.4.1
numba==0.60.0
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
@@ -48,13 +48,14 @@ nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
-nvidia-nvjitlink-cu12==12.6.68
+nvidia-nvjitlink-cu12==12.6.77
nvidia-nvtx-cu12==12.1.105
packaging==24.1
-pandas==2.2.2
-peft==0.12.0
+pandas==2.2.3
+peft==0.13.2
pillow==10.4.0
-protobuf==5.28.1
+propcache==0.2.0
+protobuf==5.28.2
psutil==6.0.0
pyarrow==17.0.0
Pygments==2.18.0
@@ -64,23 +65,23 @@ pytz==2024.2
PyYAML==6.0.2
regex==2024.9.11
requests==2.32.3
-rich==13.8.1
+rich==13.9.2
safetensors==0.4.5
sentencepiece==0.2.0
shtab==1.7.1
simpleeval==0.9.13
six==1.16.0
-sympy==1.13.2
+sympy==1.13.3
threadpoolctl==3.5.0
-tokenizers==0.19.1
+tokenizers==0.20.1
torch==2.4.1
tqdm==4.66.5
-transformers==4.44.2
+transformers==4.45.2
triton==3.0.0
-trl==0.10.1
+trl==0.11.3
typing_extensions==4.12.2
-tyro==0.8.10
-tzdata==2024.1
+tyro==0.8.12
+tzdata==2024.2
urllib3==2.2.3
xxhash==3.5.0
-yarl==1.11.1
+yarl==1.15.0
diff --git a/scripts/benchmarks/scenarios.yaml b/scripts/benchmarks/scenarios.yaml
index 564bf9e5..976912ab 100644
--- a/scripts/benchmarks/scenarios.yaml
+++ b/scripts/benchmarks/scenarios.yaml
@@ -48,14 +48,12 @@ scenarios:
- 'mistralai/Mixtral-8x7B-Instruct-v0.1'
- 'NousResearch/Llama-2-70b-hf'
torch_dtype: bfloat16
- bf16: True
- name: standard-peft
framework_config:
-
- foak-fast-kernels
arguments:
- bf16: True
learning_rate: 2e-4
torch_dtype: bfloat16
peft_method: lora