diff --git a/pyproject.toml b/pyproject.toml
index a29fba8b..4c475deb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.3.3"
+version = "1.3.4"
 authors = [
     "Together AI <support@together.ai>"
 ]
diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
index 3fdbd74b..bd509e60 100644
--- a/src/together/cli/api/finetune.py
+++ b/src/together/cli/api/finetune.py
@@ -11,8 +11,13 @@
 from tabulate import tabulate
 
 from together import Together
-from together.cli.api.utils import INT_WITH_MAX
-from together.utils import finetune_price_to_dollars, log_warn, parse_timestamp
+from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX
+from together.utils import (
+    finetune_price_to_dollars,
+    log_warn,
+    log_warn_once,
+    parse_timestamp,
+)
 from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits
 
 
@@ -93,6 +98,13 @@ def fine_tuning(ctx: click.Context) -> None:
     default=False,
     help="Whether to skip the launch confirmation message",
 )
+@click.option(
+    "--train-on-inputs",
+    type=BOOL_WITH_AUTO,
+    default="auto",
+    help="Whether to mask the user messages in conversational data or prompts in instruction data. "
+    "`auto` will automatically determine whether to mask the inputs based on the data format.",
+)
 def create(
     ctx: click.Context,
     training_file: str,
@@ -112,6 +124,7 @@ def create(
     suffix: str,
     wandb_api_key: str,
     confirm: bool,
+    train_on_inputs: bool | Literal["auto"],
 ) -> None:
     """Start fine-tuning"""
     client: Together = ctx.obj
@@ -133,6 +146,7 @@ def create(
         lora_trainable_modules=lora_trainable_modules,
         suffix=suffix,
         wandb_api_key=wandb_api_key,
+        train_on_inputs=train_on_inputs,
     )
 
     model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
@@ -150,6 +164,10 @@ def create(
             "batch_size": model_limits.lora_training.max_batch_size,
             "learning_rate": 1e-3,
         }
+        log_warn_once(
+            f"The default LoRA rank for {model} has been changed to {default_values['lora_r']} as the max available.\n"
+            f"Also, the default learning rate for LoRA fine-tuning has been changed to {default_values['learning_rate']}."
+        )
         for arg in default_values:
             arg_source = ctx.get_parameter_source("arg")  # type: ignore[attr-defined]
             if arg_source == ParameterSource.DEFAULT:
@@ -186,22 +204,7 @@ def create(
 
     if confirm or click.confirm(_CONFIRMATION_MESSAGE, default=True, show_default=True):
         response = client.fine_tuning.create(
-            training_file=training_file,
-            model=model,
-            n_epochs=n_epochs,
-            validation_file=validation_file,
-            n_evals=n_evals,
-            n_checkpoints=n_checkpoints,
-            batch_size=batch_size,
-            learning_rate=learning_rate,
-            warmup_ratio=warmup_ratio,
-            lora=lora,
-            lora_r=lora_r,
-            lora_dropout=lora_dropout,
-            lora_alpha=lora_alpha,
-            lora_trainable_modules=lora_trainable_modules,
-            suffix=suffix,
-            wandb_api_key=wandb_api_key,
+            **training_args,
             verbose=True,
         )
 
diff --git a/src/together/cli/api/utils.py b/src/together/cli/api/utils.py
index 3f85f380..08dfe492 100644
--- a/src/together/cli/api/utils.py
+++ b/src/together/cli/api/utils.py
@@ -27,4 +27,25 @@ def convert(
             )
 
 
+class BooleanWithAutoParamType(click.ParamType):
+    name = "boolean_or_auto"
+
+    def convert(
+        self, value: str, param: click.Parameter | None, ctx: click.Context | None
+    ) -> bool | Literal["auto"] | None:
+        if value == "auto":
+            return "auto"
+        try:
+            return bool(value)
+        except ValueError:
+            self.fail(
+                _("{value!r} is not a valid {type}.").format(
+                    value=value, type=self.name
+                ),
+                param,
+                ctx,
+            )
+
+
 INT_WITH_MAX = AutoIntParamType()
+BOOL_WITH_AUTO = BooleanWithAutoParamType()
diff --git a/src/together/constants.py b/src/together/constants.py
index b4c9cf3b..c64af326 100644
--- a/src/together/constants.py
+++ b/src/together/constants.py
@@ -1,3 +1,5 @@
+import enum
+
 # Session constants
 TIMEOUT_SECS = 600
 MAX_SESSION_LIFETIME_SECS = 180
@@ -29,3 +31,20 @@
 
 # expected columns for Parquet files
 PARQUET_EXPECTED_COLUMNS = ["input_ids", "attention_mask", "labels"]
+
+
+class DatasetFormat(enum.Enum):
+    """Dataset format enum."""
+
+    GENERAL = "general"
+    CONVERSATION = "conversation"
+    INSTRUCTION = "instruction"
+
+
+JSONL_REQUIRED_COLUMNS_MAP = {
+    DatasetFormat.GENERAL: ["text"],
+    DatasetFormat.CONVERSATION: ["messages"],
+    DatasetFormat.INSTRUCTION: ["prompt", "completion"],
+}
+REQUIRED_COLUMNS_MESSAGE = ["role", "content"]
+POSSIBLE_ROLES_CONVERSATION = ["system", "user", "assistant"]
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
index 44d74f2b..79596dd2 100644
--- a/src/together/resources/finetune.py
+++ b/src/together/resources/finetune.py
@@ -43,6 +43,7 @@ def createFinetuneRequest(
     lora_trainable_modules: str | None = "all-linear",
     suffix: str | None = None,
     wandb_api_key: str | None = None,
+    train_on_inputs: bool | Literal["auto"] = "auto",
 ) -> FinetuneRequest:
     if batch_size == "max":
         log_warn_once(
@@ -95,6 +96,7 @@ def createFinetuneRequest(
         training_type=training_type,
         suffix=suffix,
         wandb_key=wandb_api_key,
+        train_on_inputs=train_on_inputs,
     )
 
     return finetune_request
@@ -125,6 +127,7 @@ def create(
         wandb_api_key: str | None = None,
         verbose: bool = False,
         model_limits: FinetuneTrainingLimits | None = None,
+        train_on_inputs: bool | Literal["auto"] = "auto",
     ) -> FinetuneResponse:
         """
         Method to initiate a fine-tuning job
@@ -137,7 +140,7 @@ def create(
             n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
             n_checkpoints (int, optional): Number of checkpoints to save during fine-tuning.
                 Defaults to 1.
-            batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
+            batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
             learning_rate (float, optional): Learning rate multiplier to use for training
                 Defaults to 0.00001.
             warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
@@ -154,6 +157,12 @@ def create(
                 Defaults to False.
             model_limits (FinetuneTrainingLimits, optional): Limits for the hyperparameters the model in Fine-tuning.
                 Defaults to None.
+            train_on_inputs (bool or "auto"): Whether to mask the user messages in conversational data or prompts in instruction data.
+                "auto" will automatically determine whether to mask the inputs based on the data format.
+                For datasets with the "text" field (general format), inputs will not be masked.
+                For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields
+                (Instruction format), inputs will be masked.
+                Defaults to "auto".
 
         Returns:
             FinetuneResponse: Object containing information about fine-tuning job.
@@ -184,6 +193,7 @@ def create(
             lora_trainable_modules=lora_trainable_modules,
             suffix=suffix,
             wandb_api_key=wandb_api_key,
+            train_on_inputs=train_on_inputs,
         )
 
         if verbose:
@@ -436,6 +446,7 @@ async def create(
         wandb_api_key: str | None = None,
         verbose: bool = False,
         model_limits: FinetuneTrainingLimits | None = None,
+        train_on_inputs: bool | Literal["auto"] = "auto",
     ) -> FinetuneResponse:
         """
         Async method to initiate a fine-tuning job
@@ -465,6 +476,12 @@ async def create(
                 Defaults to False.
             model_limits (FinetuneTrainingLimits, optional): Limits for the hyperparameters the model in Fine-tuning.
                 Defaults to None.
+            train_on_inputs (bool or "auto"): Whether to mask the user messages in conversational data or prompts in instruction data.
+                "auto" will automatically determine whether to mask the inputs based on the data format.
+                For datasets with the "text" field (general format), inputs will not be masked.
+                For datasets with the "messages" field (conversational format) or "prompt" and "completion" fields
+                (Instruction format), inputs will be masked.
+                Defaults to "auto".
 
         Returns:
             FinetuneResponse: Object containing information about fine-tuning job.
@@ -495,6 +512,7 @@ async def create(
             lora_trainable_modules=lora_trainable_modules,
             suffix=suffix,
             wandb_api_key=wandb_api_key,
+            train_on_inputs=train_on_inputs,
         )
 
         if verbose:
diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
index 2f76c446..f1fabb04 100644
--- a/src/together/types/finetune.py
+++ b/src/together/types/finetune.py
@@ -3,7 +3,7 @@
 from enum import Enum
 from typing import List, Literal
 
-from pydantic import Field, validator, field_validator
+from pydantic import StrictBool, Field, validator, field_validator
 
 from together.types.abstract import BaseModel
 from together.types.common import (
@@ -163,6 +163,7 @@ class FinetuneRequest(BaseModel):
     # weights & biases api key
     wandb_key: str | None = None
     training_type: FullTrainingType | LoRATrainingType | None = None
+    train_on_inputs: StrictBool | Literal["auto"] = "auto"
 
 
 class FinetuneResponse(BaseModel):
@@ -230,6 +231,7 @@ class FinetuneResponse(BaseModel):
     # training file metadata
     training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines")
     training_file_size: int | None = Field(None, alias="TrainingFileSize")
+    train_on_inputs: StrictBool | Literal["auto"] | None = "auto"
 
     @field_validator("training_type")
     @classmethod
diff --git a/src/together/utils/files.py b/src/together/utils/files.py
index d063c053..7267ccbd 100644
--- a/src/together/utils/files.py
+++ b/src/together/utils/files.py
@@ -13,9 +13,28 @@
     MIN_SAMPLES,
     NUM_BYTES_IN_GB,
     PARQUET_EXPECTED_COLUMNS,
+    JSONL_REQUIRED_COLUMNS_MAP,
+    REQUIRED_COLUMNS_MESSAGE,
+    POSSIBLE_ROLES_CONVERSATION,
+    DatasetFormat,
 )
 
 
+class InvalidFileFormatError(ValueError):
+    """Exception raised for invalid file formats during file checks."""
+
+    def __init__(
+        self,
+        message: str = "",
+        line_number: int | None = None,
+        error_source: str | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.message = message
+        self.line_number = line_number
+        self.error_source = error_source
+
+
 def check_file(
     file: Path | str,
 ) -> Dict[str, Any]:
@@ -31,7 +50,7 @@ def check_file(
         "line_type": None,
         "text_field": None,
         "key_value": None,
-        "min_samples": None,
+        "has_min_samples": None,
         "num_samples": None,
         "load_json": None,
     }
@@ -90,43 +109,132 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
         report_dict["is_check_passed"] = False
         return report_dict
 
+    dataset_format = None
     with file.open() as f:
-        # idx must be instantiated so decode errors (e.g. file is a tar) or empty files are caught
         idx = -1
         try:
             for idx, line in enumerate(f):
-                json_line = json.loads(line)  # each line in jsonlines should be a json
+                json_line = json.loads(line)
 
                 if not isinstance(json_line, dict):
-                    report_dict["line_type"] = False
-                    report_dict["message"] = (
-                        f"Error parsing file. Invalid format on line {idx + 1} of the input file. "
-                        'Example of valid json: {"text": "my sample string"}. '
+                    raise InvalidFileFormatError(
+                        message=(
+                            f"Error parsing file. Invalid format on line {idx + 1} of the input file. "
+                            'Example of valid json: {"text": "my sample string"}. '
+                        ),
+                        line_number=idx + 1,
+                        error_source="line_type",
                     )
 
-                    report_dict["is_check_passed"] = False
+                current_format = None
+                for possible_format in JSONL_REQUIRED_COLUMNS_MAP:
+                    if all(
+                        column in json_line
+                        for column in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
+                    ):
+                        if current_format is None:
+                            current_format = possible_format
+                        elif current_format != possible_format:
+                            raise InvalidFileFormatError(
+                                message="Found multiple dataset formats in the input file. "
+                                f"Got {current_format} and {possible_format} on line {idx + 1}.",
+                                line_number=idx + 1,
+                                error_source="format",
+                            )
 
-                if "text" not in json_line.keys():
-                    report_dict["text_field"] = False
-                    report_dict["message"] = (
-                        f"Missing 'text' field was found on line {idx + 1} of the the input file. "
-                        "Expected format: {'text': 'my sample string'}. "
+                if current_format is None:
+                    raise InvalidFileFormatError(
+                        message=(
+                            f"Error parsing file. Could not detect a format for the line {idx + 1} with the columns:\n"
+                            f"{json_line.keys()}"
+                        ),
+                        line_number=idx + 1,
+                        error_source="format",
                     )
-                    report_dict["is_check_passed"] = False
-                else:
-                    # check to make sure the value of the "text" key is a string
-                    if not isinstance(json_line["text"], str):
-                        report_dict["key_value"] = False
-                        report_dict["message"] = (
-                            f'Invalid value type for "text" key on line {idx + 1}. '
-                            f'Expected string. Found {type(json_line["text"])}.'
+
+                if current_format == DatasetFormat.CONVERSATION:
+                    message_column = JSONL_REQUIRED_COLUMNS_MAP[
+                        DatasetFormat.CONVERSATION
+                    ][0]
+                    if not isinstance(json_line[message_column], list):
+                        raise InvalidFileFormatError(
+                            message=f"Invalid format on line {idx + 1} of the input file. "
+                            f"Expected a list of messages. Found {type(json_line[message_column])}",
+                            line_number=idx + 1,
+                            error_source="key_value",
                         )
 
-                        report_dict["is_check_passed"] = False
+                    for turn_id, turn in enumerate(json_line[message_column]):
+                        if not isinstance(turn, dict):
+                            raise InvalidFileFormatError(
+                                message=f"Invalid format on line {idx + 1} of the input file. "
+                                f"Expected a dictionary in the {turn_id + 1} turn. Found {type(turn)}",
+                                line_number=idx + 1,
+                                error_source="key_value",
+                            )
+
+                    previous_role = None
+                    for turn in json_line[message_column]:
+                        for column in REQUIRED_COLUMNS_MESSAGE:
+                            if column not in turn:
+                                raise InvalidFileFormatError(
+                                    message=f"Field `{column}` is missing for a turn `{turn}` on line {idx + 1} "
+                                    "of the the input file.",
+                                    line_number=idx + 1,
+                                    error_source="key_value",
+                                )
+                            else:
+                                if not isinstance(turn[column], str):
+                                    raise InvalidFileFormatError(
+                                        message=f"Invalid format on line {idx + 1} in the column {column} for turn `{turn}` "
+                                        f"of the input file. Expected string. Found {type(turn[column])}",
+                                        line_number=idx + 1,
+                                        error_source="text_field",
+                                    )
+                        role = turn["role"]
+
+                        if role not in POSSIBLE_ROLES_CONVERSATION:
+                            raise InvalidFileFormatError(
+                                message=f"Found invalid role `{role}` in the messages on the line {idx + 1}. "
+                                f"Possible roles in the conversation are: {POSSIBLE_ROLES_CONVERSATION}",
+                                line_number=idx + 1,
+                                error_source="key_value",
+                            )
+
+                        if previous_role == role:
+                            raise InvalidFileFormatError(
+                                message=f"Invalid role turns on line {idx + 1} of the input file. "
+                                "`user` and `assistant` roles must alternate user/assistant/user/assistant/...",
+                                line_number=idx + 1,
+                                error_source="key_value",
+                            )
+
+                        previous_role = role
+
+                else:
+                    for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
+                        if not isinstance(json_line[column], str):
+                            raise InvalidFileFormatError(
+                                message=f'Invalid value type for "{column}" key on line {idx + 1}. '
+                                f"Expected string. Found {type(json_line[column])}.",
+                                line_number=idx + 1,
+                                error_source="key_value",
+                            )
+
+                if dataset_format is None:
+                    dataset_format = current_format
+                elif current_format is not None:
+                    if current_format != dataset_format:
+                        raise InvalidFileFormatError(
+                            message="All samples in the dataset must have the same dataset format. "
+                            f"Got {dataset_format} for the first line and {current_format} "
+                            f"for the line {idx + 1}.",
+                            line_number=idx + 1,
+                            error_source="format",
+                        )
 
-            # make sure this is outside the for idx, line in enumerate(f): for loop
             if idx + 1 < MIN_SAMPLES:
-                report_dict["min_samples"] = False
+                report_dict["has_min_samples"] = False
                 report_dict["message"] = (
                     f"Processing {file} resulted in only {idx + 1} samples. "
                     f"Our minimum is {MIN_SAMPLES} samples. "
@@ -134,10 +242,19 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
                 report_dict["is_check_passed"] = False
             else:
                 report_dict["num_samples"] = idx + 1
-                report_dict["min_samples"] = True
+                report_dict["has_min_samples"] = True
+                report_dict["is_check_passed"] = True
 
             report_dict["load_json"] = True
 
+        except InvalidFileFormatError as e:
+            report_dict["load_json"] = False
+            report_dict["is_check_passed"] = False
+            report_dict["message"] = e.message
+            if e.line_number is not None:
+                report_dict["line_number"] = e.line_number
+            if e.error_source is not None:
+                report_dict[e.error_source] = False
         except ValueError:
             report_dict["load_json"] = False
             if idx < 0:
@@ -192,7 +309,8 @@ def _check_parquet(file: Path) -> Dict[str, Any]:
 
     num_samples = len(table)
     if num_samples < MIN_SAMPLES:
-        report_dict["min_samples"] = (
+        report_dict["has_min_samples"] = False
+        report_dict["message"] = (
             f"Processing {file} resulted in only {num_samples} samples. "
             f"Our minimum is {MIN_SAMPLES} samples. "
         )
diff --git a/tests/unit/test_files_checks.py b/tests/unit/test_files_checks.py
new file mode 100644
index 00000000..65f59f61
--- /dev/null
+++ b/tests/unit/test_files_checks.py
@@ -0,0 +1,281 @@
+import json
+import pytest
+from pathlib import Path
+
+from together.constants import MIN_SAMPLES
+from together.utils.files import check_file
+
+
+def test_check_jsonl_valid_general(tmp_path: Path):
+    # Create a valid JSONL file
+    file = tmp_path / "valid.jsonl"
+    content = [{"text": "Hello, world!"}, {"text": "How are you?"}]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert report["is_check_passed"]
+    assert report["utf8"]
+    assert report["num_samples"] == len(content)
+    assert report["has_min_samples"]
+
+
+def test_check_jsonl_valid_instruction(tmp_path: Path):
+    # Create a valid JSONL file with instruction format
+    file = tmp_path / "valid_instruction.jsonl"
+    content = [
+        {"prompt": "Translate the following sentence.", "completion": "Hello, world!"},
+        {
+            "prompt": "Summarize the text.",
+            "completion": "Weyland-Yutani Corporation creates advanced AI.",
+        },
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert report["is_check_passed"]
+    assert report["utf8"]
+    assert report["num_samples"] == len(content)
+    assert report["has_min_samples"]
+
+
+def test_check_jsonl_valid_conversational_single_turn(tmp_path: Path):
+    # Create a valid JSONL file with conversational format and 1 user-assistant turn pair
+    file = tmp_path / "valid_conversational_single_turn.jsonl"
+    content = [
+        {
+            "messages": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there!"},
+            ]
+        },
+        {
+            "messages": [
+                {"role": "user", "content": "How are you?"},
+                {"role": "assistant", "content": "I am fine."},
+            ]
+        },
+        {
+            "messages": [
+                {"role": "system", "content": "You are a kind AI"},
+                {"role": "user", "content": "How are you?"},
+                {"role": "assistant", "content": "I am fine."},
+            ]
+        },
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert report["is_check_passed"]
+    assert report["utf8"]
+    assert report["num_samples"] == len(content)
+    assert report["has_min_samples"]
+
+
+def test_check_jsonl_valid_conversational_multiple_turns(tmp_path: Path):
+    # Create a valid JSONL file with conversational format and multiple user-assistant turn pairs
+    file = tmp_path / "valid_conversational_multiple_turns.jsonl"
+    content = [
+        {
+            "messages": [
+                {"role": "user", "content": "Is it going to rain today?"},
+                {
+                    "role": "assistant",
+                    "content": "Yes, expect showers in the afternoon.",
+                },
+                {"role": "user", "content": "What is the weather like in Tokyo?"},
+                {"role": "assistant", "content": "It is sunny with a chance of rain."},
+            ]
+        },
+        {
+            "messages": [
+                {"role": "user", "content": "Who won the game last night?"},
+                {"role": "assistant", "content": "The home team won by two points."},
+                {"role": "user", "content": "What is the weather like in Amsterdam?"},
+                {"role": "assistant", "content": "It is cloudy with a chance of snow."},
+            ]
+        },
+        {
+            "messages": [
+                {"role": "system", "content": "You are a kind AI"},
+                {"role": "user", "content": "Who won the game last night?"},
+                {"role": "assistant", "content": "The home team won by two points."},
+                {"role": "user", "content": "What is the weather like in Amsterdam?"},
+                {"role": "assistant", "content": "It is cloudy with a chance of snow."},
+            ]
+        },
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert report["is_check_passed"]
+    assert report["utf8"]
+    assert report["num_samples"] == len(content)
+    assert report["has_min_samples"]
+
+
+def test_check_jsonl_empty_file(tmp_path: Path):
+    # Create an empty JSONL file
+    file = tmp_path / "empty.jsonl"
+    file.touch()
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert report["message"] == "File is empty"
+    assert report["file_size"] == 0
+
+
+def test_check_jsonl_non_utf8(tmp_path: Path):
+    # Create a non-UTF-8 encoded JSONL file
+    file = tmp_path / "non_utf8.jsonl"
+    file.write_bytes(b"\xff\xfe\xfd")
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert not report["utf8"]
+    assert "File is not UTF-8 encoded." in report["message"]
+
+
+def test_check_jsonl_invalid_json(tmp_path: Path):
+    # Create a JSONL file with invalid JSON
+    file = tmp_path / "invalid_json.jsonl"
+    content = [{"text": "Hello, world!"}, "Invalid JSON Line"]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert "Error parsing file." in report["message"]
+
+
+def test_check_jsonl_missing_required_field(tmp_path: Path):
+    # Create a JSONL file missing a required field
+    file = tmp_path / "missing_field.jsonl"
+    content = [
+        {"prompt": "Translate the following sentence.", "completion": "Hello, world!"},
+        {"prompt": "Summarize the text."},
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert (
+        "Error parsing file. Could not detect a format for the line 2"
+        in report["message"]
+    )
+
+
+def test_check_jsonl_inconsistent_dataset_format(tmp_path: Path):
+    # Create a JSONL file with inconsistent dataset formats
+    file = tmp_path / "inconsistent_format.jsonl"
+    content = [
+        {"messages": [{"role": "user", "content": "Hi"}]},
+        {"text": "How are you?"},  # Missing 'messages'
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert (
+        "All samples in the dataset must have the same dataset format"
+        in report["message"]
+    )
+
+
+def test_check_jsonl_invalid_role(tmp_path: Path):
+    # Create a JSONL file with an invalid role
+    file = tmp_path / "invalid_role.jsonl"
+    content = [{"messages": [{"role": "invalid_role", "content": "Hi"}]}]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert "Found invalid role `invalid_role`" in report["message"]
+
+
+def test_check_jsonl_non_alternating_roles(tmp_path: Path):
+    # Create a JSONL file with non-alternating user/assistant roles
+    file = tmp_path / "non_alternating_roles.jsonl"
+    content = [
+        {
+            "messages": [
+                {"role": "user", "content": "Hi"},
+                {"role": "user", "content": "Hello again"},
+            ]
+        }
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+
+    assert not report["is_check_passed"]
+    assert "Invalid role turns" in report["message"]
+
+
+def test_check_jsonl_invalid_value_type(tmp_path: Path):
+    # Create a JSONL file with an invalid value type
+    file = tmp_path / "invalid_value_type.jsonl"
+    content = [{"text": 123}]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+    assert not report["is_check_passed"]
+    assert "Expected string" in report["message"]
+
+
+def test_check_jsonl_missing_field_in_conversation(tmp_path: Path):
+    file = tmp_path / "missing_field_in_conversation.jsonl"
+    content = [
+        {
+            "messages": [
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant"},
+            ]
+        }
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+    assert not report["is_check_passed"]
+    assert "Field `content` is missing for a turn" in report["message"]
+
+
+def test_check_jsonl_wrong_turn_type(tmp_path: Path):
+    file = tmp_path / "wrong_turn_type.jsonl"
+    content = [
+        {
+            "messages": [
+                "Hi!",
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant"},
+            ]
+        }
+    ]
+    with file.open("w") as f:
+        f.write("\n".join(json.dumps(item) for item in content))
+
+    report = check_file(file)
+    assert not report["is_check_passed"]
+    assert (
+        "Invalid format on line 1 of the input file. Expected a dictionary"
+        in report["message"]
+    )