From a6ee0754ad96e8849b108197a0ff02dd1db0d8ec Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Thu, 9 Jan 2025 12:50:15 +0000 Subject: [PATCH] pushing to runpod --- examples/qwen2/prm.yaml | 16 ++++++++-------- src/axolotl/core/trainer_builder.py | 5 ++++- .../prompt_strategies/stepwise_supervised.py | 17 +++++++++-------- src/axolotl/utils/data/sft.py | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/examples/qwen2/prm.yaml b/examples/qwen2/prm.yaml index 3b34f5f0e..7ff0f1663 100644 --- a/examples/qwen2/prm.yaml +++ b/examples/qwen2/prm.yaml @@ -31,18 +31,18 @@ wandb_name: wandb_log_model: -gradient_accumulation_steps: 4 -micro_batch_size: 2 -num_epochs: 4 +gradient_accumulation_steps: 1 +micro_batch_size: 8 +num_epochs: 1 optimizer: adamw_torch lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false group_by_length: false -float32: true -fp16: false -tf32: false +bf16: true +fp16: +tf32: gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false @@ -50,8 +50,8 @@ early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 -xformers_attention: false -flash_attention: false +xformers_attention: +flash_attention: true warmup_ratio: 0.1 evals_per_epoch: diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index a9c1a462d..aa3b9a7cb 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1970,7 +1970,10 @@ def build(self, total_num_steps): trainer_kwargs["processing_class"] = self.tokenizer else: trainer_kwargs["tokenizer"] = self.tokenizer - if not (trainer_cls in [AxolotlRewardTrainer, AxolotlPRMTrainer]) and self.cfg.datasets is not None: + if ( + not (trainer_cls in [AxolotlRewardTrainer, AxolotlPRMTrainer]) + and self.cfg.datasets is not None + ): trainer_kwargs["dataset_tags"] = [ d["path"] for d in self.cfg.datasets if not Path(d["path"]).is_dir() ] diff --git a/src/axolotl/prompt_strategies/stepwise_supervised.py b/src/axolotl/prompt_strategies/stepwise_supervised.py index 399e3a8fc..2e89328ab 100644 --- a/src/axolotl/prompt_strategies/stepwise_supervised.py +++ b/src/axolotl/prompt_strategies/stepwise_supervised.py @@ -4,13 +4,11 @@ """ from itertools import chain - -from typing import Dict, Generator, List, Optional, Union +from typing import Dict, List, Optional, Union from transformers import BatchEncoding, PreTrainedTokenizer -from axolotl.prompt_tokenizers import IGNORE_INDEX, PromptTokenizingStrategy -from axolotl.prompters import Prompter +from axolotl.prompt_tokenizers import IGNORE_INDEX from axolotl.utils.dict import DictDefault @@ -55,7 +53,9 @@ def tokenize_prompt( # Handle labels if self.train_on_last_step_only: - labels = [-100] * (len(prompt["labels"]) - 1) + [int(prompt["labels"][-1])] + labels = [IGNORE_INDEX] * (len(prompt["labels"]) - 1) + [ + int(prompt["labels"][-1]) + ] else: labels = [int(label) for label in prompt["labels"]] @@ -67,13 +67,13 @@ def tokenize_prompt( # Create step-wise labels labels = [ - [-100] * (len(completion) - 1) + [label] + [IGNORE_INDEX] * (len(completion) - 1) + [label] # type: ignore for completion, label in zip(completions_ids, labels) ] # Join all steps completion_ids = list(chain(*completions_ids)) - labels = list(chain(*labels)) + labels = list(chain(*labels)) # type: ignore # Handle max lengths if self.max_completion_length: @@ -86,7 +86,8 @@ def tokenize_prompt( # Combine prompt and completion input_ids = prompt_ids + completion_ids - full_labels = [-100] * len(prompt_ids) + labels + + full_labels = [IGNORE_INDEX] * len(prompt_ids) + labels # Apply max sequence length if self.sequence_len: input_ids = input_ids[: self.sequence_len] diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 8a5eeff5a..3e784ca3e 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -6,9 +6,9 @@ from typing import List, Tuple, Union from datasets import ( - concatenate_datasets, Dataset, DatasetDict, + concatenate_datasets, load_dataset, load_from_disk, )