diff --git a/docs/source/conf.py b/docs/source/conf.py
index f23d2ff8c79d37..6659cf6e0550fc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -339,6 +339,8 @@
     ("py:class", "mlflow.types.schema.ColSpec"),
     ("py:class", "mlflow.types.schema.TensorSpec"),
     ("py:class", "mlflow.types.schema.Schema"),
+    ("py:class", "mlflow.types.schema.ParamSchema"),
+    ("py:class", "mlflow.types.schema.ParamSpec"),
     ("py:class", "mlflow.models.model.Model"),
     ("py:class", "mlflow.models.signature.ModelSignature"),
     ("py:class", "MlflowInferableDataset"),
diff --git a/docs/source/model-registry.rst b/docs/source/model-registry.rst
index 718cc65674a4e6..34bb566b6337fd 100644
--- a/docs/source/model-registry.rst
+++ b/docs/source/model-registry.rst
@@ -564,7 +564,7 @@ save, log, register, and load from the Model Registry and score.
             prediction_scores = self._analyser.polarity_scores(txt)
             return prediction_scores
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             # Apply the preprocess function from the vader model to score
             model_output = model_input.apply(lambda col: self._score(col))
             return model_output
diff --git a/docs/source/models.rst b/docs/source/models.rst
index 2d3a3b37855e0b..e6c356440c931c 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -204,19 +204,90 @@ Model Signature And Input Example
 ---------------------------------
 When working with ML models you often need to know some basic functional properties of the model
 at hand, such as "What inputs does it expect?" and "What output does it produce?". MLflow models can
-include the following additional metadata about model inputs and outputs that can be used by
+include the following additional metadata about model inputs, outputs and params that can be used by
 downstream tooling:
 
-* :ref:`Model Signature <model-signature>` - description of a model's inputs and outputs.
+* :ref:`Model Inference Params <inference-params>` - description of params used for model inference.
+* :ref:`Model Signature <model-signature>` - description of a model's inputs, outputs and parameters.
 * :ref:`Model Input Example <input-example>` - example of a valid model input.
 
+.. _inference-params:
+
+Model Inference Params
+^^^^^^^^^^^^^^^^^^^^^^
+Inference params are parameters that are passed to the model at inference time. These parameters
+do not need to be specified when training the model, but could be useful for inference. With the 
+advances in foundational models, more often "inference configuration" is used to modify the behavior 
+of a model. In some cases, especially popular LLMs, the same model may require different parameter 
+configurations for different samples at inference time. 
+
+With this newly introduced feature, you can now specify a dictionary of inference params during 
+model inference, providing a broader utility and improved control over the generated inference 
+results, particularly for LLM use cases. By passing different params such as ``temperature``, 
+``max_length``, etc. to the model at inference time, you can easily control the output of the model.
+
+In order to use params at inference time, a valid :ref:`Model Signature <model-signature>` with 
+``params`` must be defined. The params are passed to the model at inference time as a dictionary 
+and each param value will be validated against the corresponding param type defined in the model
+signature. Valid param types are ``DataType`` or ``a list of DataType`` as listed below.
+
+* DataType.string or an array of DataType.string
+* DataType.integer or an array of DataType.integer
+* DataType.boolean or an array of DataType.boolean
+* DataType.double or an array of DataType.double
+* DataType.float or an array of DataType.float
+* DataType.long or an array of DataType.long
+* DataType.datetime or an array of DataType.datetime
+
+.. note::
+    When validating param values, the values will be converted to python native types.
+    For example, ``np.float32(0.1)`` will be converted to ``float(0.1)``.
+
+A simple example of using params for model inference:
+
+.. code-block:: python
+
+    import mlflow
+    from mlflow.models import infer_signature
+
+
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input, params):
+            return list(params.values())
+
+
+    params = {"str_param": "string", "int_array": [1, 2, 3]}
+    # params' default values are saved with ModelSignature
+    signature = infer_signature(["input"], params=params)
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=MyModel(), artifact_path="my_model", signature=signature
+        )
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+
+    # Not passing params -- predict with default values
+    loaded_predict = loaded_model.predict(["input"])
+    assert loaded_predict == ["string", [1, 2, 3]]
+
+    # Passing some params -- add default values
+    loaded_predict = loaded_model.predict(["input"], params={"str_param": "new_string"})
+    assert loaded_predict == ["new_string", [1, 2, 3]]
+
+    # Passing all params -- override
+    loaded_predict = loaded_model.predict(
+        ["input"], params={"str_param": "new_string", "int_array": [4, 5, 6]}
+    )
+    assert loaded_predict == ["new_string", [4, 5, 6]]
+
 .. _model-signature:
 
 Model Signature
 ^^^^^^^^^^^^^^^
-Model signatures define input and output schemas for MLflow models, providing a standard 
+Model signatures define input, output and parameters schemas for MLflow models, providing a standard 
 interface to codify and enforce the correct use of your models. Signatures are fetched by the MLflow Tracking
-UI and Model Registry UI to display model inputs and outputs. They are also utilized by
+UI and Model Registry UI to display model inputs, outputs and params. They are also utilized by
 :ref:`MLflow model deployment tools <built-in-deployment>` to validate inference inputs according to
 the model's assigned signature (see the :ref:`Signature enforcement <signature-enforcement>` section
 for more details).
@@ -236,7 +307,11 @@ A model signature consists on inputs and outputs schemas, each of which can be e
 Column-based schemas are a sequence of (optionally) named columns with type specified as one of the
 :py:class:`MLflow data types <mlflow.types.DataType>`.
 Tensor-based schemas are a sequence of (optionally) named tensors with type specified as one of the
-`numpy data types <https://numpy.org/devdocs/user/basics.types.html>`_. See some examples of constructing them below.
+`numpy data types <https://numpy.org/devdocs/user/basics.types.html>`_. 
+Params schema is a sequence of ParamSpec, each of which contains ``name``, ``type``, ``default`` and ``shape`` fields.
+``type`` field must be specified as one of the :py:class:`MLflow data types <mlflow.types.DataType>`, and ``shape`` 
+field should be ``None`` for scalar parameters, or ``(-1,)`` for list parameters.
+See some examples of constructing them below.
 
 Column-based Signature Example
 """"""""""""""""""""""""""""""
@@ -255,6 +330,7 @@ The output is an unnamed integer specifying the predicted class.
         (cm)", "type": "double"}, {"name": "petal length (cm)", "type": "double"}, {"name":
         "petal width (cm)", "type": "double"}, {"name": "class", "type": "string", "optional": "true"}]'
       outputs: '[{"type": "integer"}]'
+      params: null
 
 Tensor-based Signature Example
 """"""""""""""""""""""""""""""
@@ -274,14 +350,33 @@ and the output is the batch size and is thus set to -1 to allow for variable bat
   signature:
       inputs: '[{"name": "images", "dtype": "uint8", "shape": [-1, 28, 28, 1]}]'
       outputs: '[{"shape": [-1, 10], "dtype": "float32"}]'
+      params: null
+
+Signature with params Example
+"""""""""""""""""""""""""""""
+The params field is optional and is used to specify parameters that can be used for model inference.
+Params accept scalar values of type :py:class:`MLflow data types <mlflow.types.DataType>`, or a list
+of such values. The default value of a parameter is specified by setting the ``default`` field, and the value
+should be of the type specified by ``type`` field. The ``shape`` field can be used to specify the shape 
+of the value, it should be ``None`` for scalar values and ``(-1,)`` for a list.
+
+.. code-block:: yaml
+
+    signature:
+        inputs: '[{"name": "text", "type": "string"}]'
+        outputs: '[{"name": "output", "type": "string"}]'
+        params: '[{"name": "temperature", "type": "float", "default": 0.5, "shape": null},
+                  {"name": "top_k", "type": "integer", "default": 1, "shape": null},
+                  {"name": "suppress_tokens", "type": "integer", "default": [101, 102], "shape": [-1]}]'
 
 .. _signature-enforcement:
 
 Signature Enforcement
 ~~~~~~~~~~~~~~~~~~~~~
-Schema enforcement checks the provided input against the model's signature
-and raises an exception if the input is not compatible. This enforcement is applied in MLflow before
-calling the underlying model implementation. Note that this enforcement only applies when using :ref:`MLflow
+Schema enforcement checks the provided input and params against the model's signature and 
+raises an exception if the input or params is not compatible. This enforcement is applied in MLflow before
+calling the underlying model implementation, and during model inference process.
+Note that this enforcement only applies when using :ref:`MLflow
 model deployment tools <built-in-deployment>` or when loading models as ``python_function``. In
 particular, it is not applied to models that are loaded in their native format (e.g. by calling
 :py:func:`mlflow.sklearn.load_model() <mlflow.sklearn.load_model>`).
@@ -307,6 +402,17 @@ be made compatible, MLflow will raise an error.
 For models with tensor-based signatures, type checking is strict (i.e an exception will be thrown if
 the input type does not match the type specified by the schema).
 
+Params Type and Shape Enforcement
+"""""""""""""""""""""""""""""""""
+The params types and shapes are checked against the signature.
+
+MLflow verifies the compatibility of each parameter provided during inference by comparing its type and shape 
+with those specified in the signature. Scalar values should have a shape of ``None``, while list values should have 
+a shape of ``(-1,)``. If the parameter's type or shape is incompatible, an exception will be raised. 
+Additionally, the value of the parameter is validated against the specified type in the signature. We attempt 
+to convert the value to the specified type, and if this conversion fails, an MlflowException will be raised.
+A valid list of params is documented in :ref:`Model Inference Params <inference-params>` section.
+
 Handling Integers With Missing Values
 """""""""""""""""""""""""""""""""""""
 Integer data with missing values is typically represented as floats in Python. Therefore, data
@@ -343,14 +449,16 @@ How To Log Models With Signatures
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 To include a signature with your model, pass a :ref:`model input example <input-example>` to the 
 appropriate log_model or save_model call, e.g. :py:func:`sklearn.log_model() <mlflow.sklearn.log_model>`,
-and the model signature will be automatically inferred from from the input example and the model's
+and the model signature will be automatically inferred from the input example and the model's
 predicted output of the input example.
 
 You may also include a signature object with your model by passing a :py:class:`signature object
 <mlflow.models.ModelSignature>` as an argument to your log_model or save_model call. The model signature
 object can be created by hand or :py:func:`inferred <mlflow.models.infer_signature>` from datasets with
-valid model inputs (e.g. the training dataset with target column omitted) and valid model outputs
-(e.g. model predictions generated on the training dataset).
+valid model inputs (e.g. the training dataset with target column omitted), valid model outputs
+(e.g. model predictions generated on the training dataset), and valid model parameters (a dictionary of 
+parameters passed to model for inference; e.g. `Generation Configs for transformers 
+<https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig>`_).
 
 .. note::
     Model signatures are utilized in :ref:`MLflow model deployment tools <built-in-deployment>`, which
@@ -464,6 +572,78 @@ The same signature can be explicitly created and logged as follows:
     with mlflow.start_run():
         mlflow.tensorflow.log_model(model, "mnist_cnn", signature=signature)
 
+
+Signature with params Example
+"""""""""""""""""""""""""""""
+The following example demonstrates how to store a model signature with params
+for a simple transformers model:
+
+.. code-block:: python
+
+    import mlflow
+    from mlflow.models import infer_signature
+    import transformers
+
+    architecture = "mrm8488/t5-base-finetuned-common_gen"
+    model = transformers.pipeline(
+        task="text2text-generation",
+        tokenizer=transformers.T5TokenizerFast.from_pretrained(architecture),
+        model=transformers.T5ForConditionalGeneration.from_pretrained(architecture),
+    )
+    data = "pencil draw paper"
+
+    params = {
+        "top_k": 2,
+        "num_beams": 5,
+        "max_length": 30,
+        "temperature": 0.62,
+        "top_p": 0.85,
+        "repetition_penalty": 1.15,
+        "begin_suppress_tokens": [1, 2, 3],
+    }
+
+    # infer signature with params
+    signature = infer_signature(
+        data,
+        mlflow.transformers.generate_signature_output(model, data),
+        params,
+    )
+
+    # save model with signature
+    mlflow.transformers.save_model(
+        model,
+        "text2text",
+        signature=signature,
+    )
+    pyfunc_loaded = mlflow.pyfunc.load_model("text2text")
+
+    # predict with params
+    result = pyfunc_loaded.predict(data, params=params)
+
+The same signature can be created explicitly as follows:
+
+.. code-block:: python
+
+    from mlflow.models import ModelSignature
+    from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema
+
+    input_schema = Schema([ColSpec(type="string")])
+    output_schema = Schema([ColSpec(type="string")])
+    params_schema = ParamSchema(
+        [
+            ParamSpec("top_k", "long", 2),
+            ParamSpec("num_beams", "long", 5),
+            ParamSpec("max_length", "long", 30),
+            ParamSpec("temperature", "double", 0.62),
+            ParamSpec("top_p", "double", 0.85),
+            ParamSpec("repetition_penalty", "double", 1.15),
+            ParamSpec("begin_suppress_tokens", "long", [1, 2, 3], (-1,)),
+        ]
+    )
+    signature = ModelSignature(
+        inputs=input_schema, outputs=output_schema, params=params_schema
+    )
+
 .. _how-to-set-signatures-on-models:
 
 How To Set Signatures on Models
@@ -703,7 +883,8 @@ automatic dependency management).
 Once loaded, you can score the model by calling the :py:func:`predict <mlflow.pyfunc.PyFuncModel.predict>`
 method, which has the following signature::
 
-  predict(model_input: [pandas.DataFrame, numpy.ndarray, Dict[str, np.ndarray]]) -> [numpy.ndarray | pandas.(Series | DataFrame)]
+  predict(data: Union[pandas.(Series | DataFrame), numpy.ndarray, csc_matrix, csr_matrix, List[Any], Dict[str, Any], str],
+          params: Optional[Dict[str, Any]] = None) → Union[pandas.(Series | DataFrame), numpy.ndarray, list, str]
 
 All PyFunc models will support `pandas.DataFrame` as an input. In addition to `pandas.DataFrame`,
 DL PyFunc models will also support tensor inputs in the form of `numpy.ndarrays`. To verify
@@ -2692,6 +2873,119 @@ avoid failed inference requests.
 
 \***** If using `pyfunc` in MLflow Model Serving for realtime inference, the raw audio in bytes format must be base64 encoded prior to submitting to the endpoint. String inputs will be interpreted as uri locations.
 
+Using inference_config and model signature params for `transformers` inference
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+For `transformers` inference, there are two ways to pass in additional arguments to the pipeline.
+
+* Use ``inference_config`` when saving/logging the model
+* Specify params at inference time when calling ``predict()``
+
+.. note::
+    Model signature params passed in to ``predict`` function will override the values in inference_config.
+
+* Using ``inference_config``
+
+.. code-block:: python
+
+    import mlflow
+    from mlflow.models import infer_signature
+    from mlflow.transformers import generate_signature_output
+    import transformers
+
+    architecture = "mrm8488/t5-base-finetuned-common_gen"
+    model = transformers.pipeline(
+        task="text2text-generation",
+        tokenizer=transformers.T5TokenizerFast.from_pretrained(architecture),
+        model=transformers.T5ForConditionalGeneration.from_pretrained(architecture),
+    )
+    data = "pencil draw paper"
+
+    # Infer the signature
+    signature = infer_signature(
+        data,
+        generate_signature_output(model, data),
+    )
+
+    # Define an inference_config
+    inference_config = {
+        "num_beams": 5,
+        "max_length": 30,
+        "do_sample": True,
+        "remove_invalid_values": True,
+    }
+
+    # Saving inference_config with the model
+    mlflow.transformers.save_model(
+        model,
+        path="text2text",
+        inference_config=inference_config,
+        signature=signature,
+    )
+
+    pyfunc_loaded = mlflow.pyfunc.load_model("text2text")
+    # inference_config will be applied
+    result = pyfunc_loaded.predict(data)
+
+
+* Specifying params at inference time
+
+.. code-block:: python
+
+    import mlflow
+    from mlflow.models import infer_signature
+    from mlflow.transformers import generate_signature_output
+    import transformers
+
+    architecture = "mrm8488/t5-base-finetuned-common_gen"
+    model = transformers.pipeline(
+        task="text2text-generation",
+        tokenizer=transformers.T5TokenizerFast.from_pretrained(architecture),
+        model=transformers.T5ForConditionalGeneration.from_pretrained(architecture),
+    )
+    data = "pencil draw paper"
+
+    # Define an inference_config
+    inference_config = {
+        "num_beams": 5,
+        "max_length": 30,
+        "do_sample": True,
+        "remove_invalid_values": True,
+    }
+
+    # Infer the signature including params
+    signature_with_params = infer_signature(
+        data,
+        generate_signature_output(model, data),
+        params=inference_config,
+    )
+
+    # Saving model without inference_config
+    mlflow.transformers.save_model(
+        model,
+        path="text2text",
+        signature=signature_with_params,
+    )
+
+    pyfunc_loaded = mlflow.pyfunc.load_model("text2text")
+
+    # Pass params at inference time
+    params = {
+        "max_length": 20,
+        "do_sample": False,
+    }
+
+    # In this case we only override max_length and do_sample,
+    # other params will use the default one saved in ModelSignature.
+    # The final params used for prediction is as follows:
+    # {
+    #    "num_beams": 5,
+    #    "max_length": 20,
+    #    "do_sample": False,
+    #    "remove_invalid_values": True,
+    # }
+    result = pyfunc_loaded.predict(data, params=params)
+
+
 Example of loading a transformers model as a python function
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 In the below example, a simple pre-trained model is used within a pipeline. After logging to MLflow, the pipeline is
@@ -3384,7 +3678,7 @@ instance of this model with ``n = 5`` in MLflow Model format. Finally, it loads
         def __init__(self, n):
             self.n = n
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return model_input.apply(lambda column: column + self.n)
 
 
@@ -3451,7 +3745,7 @@ evaluate test data.
             self.xgb_model = xgb.Booster()
             self.xgb_model.load_model(context.artifacts["xgb_model"])
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             input_matrix = xgb.DMatrix(model_input.values)
             return self.xgb_model.predict(input_matrix)
 
@@ -3901,7 +4195,7 @@ docstrings.
         def __init__(self, sktime_model):
             self.sktime_model = sktime_model
 
-        def predict(self, dataframe) -> pd.DataFrame:
+        def predict(self, dataframe, params=None) -> pd.DataFrame:
             df_schema = dataframe.columns.values.tolist()
 
             if len(dataframe) > 1:
@@ -4175,6 +4469,11 @@ the type and encoding of the input data
   <https://www.tensorflow.org/tfx/serving/api_rest#request_format_2>`_ where the provided inputs
   will be cast to Numpy arrays.
 
+The json input also has an optional field ``params`` that can be used to pass additional parameters.
+Valid parameters types are ``Union[DataType, List[DataType], None]`` where DataType is 
+:py:class:`MLflow data types <mlflow.types.DataType>`. In order to pass params, a valid 
+:ref:`Model Signature <model-signature>` with ``params`` must be defined.
+
 .. note:: Since JSON loses type information, MLflow will cast the JSON input to the input type specified
     in the model's schema if available. If your model is sensitive to input types, it is recommended that
     a schema is provided for the model to ensure that type mismatch errors do not occur at inference time.
@@ -4215,6 +4514,13 @@ Example requests:
         "inputs": {"a": ["s1", "s2", "s3"], "b": [1, 2, 3], "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}
     }'
 
+    # inference with params
+    curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json' -d '{
+        "inputs": {"question": ["What color is it?"],
+                   "context": ["Some people said it was green but I know that it is pink."]},
+        "params": {"max_answer_len": 10}
+    }'
+
 
 For more information about serializing pandas DataFrames, see
 `pandas.DataFrame.to_json <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html>`_.
diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst
index f651b7d39c9636..2881da9aa85714 100644
--- a/docs/source/plugins.rst
+++ b/docs/source/plugins.rst
@@ -311,7 +311,7 @@ To use Aliyun OSS as an artifact store, an OSS URI of the form ``oss://<bucket>/
 
 
         class Mod(mlflow.pyfunc.PythonModel):
-            def predict(self, ctx, inp):
+            def predict(self, ctx, inp, params=None):
                 return 7
 
 
diff --git a/examples/flower_classifier/image_pyfunc.py b/examples/flower_classifier/image_pyfunc.py
index 3f9455de2761ea..d98441a9712142 100644
--- a/examples/flower_classifier/image_pyfunc.py
+++ b/examples/flower_classifier/image_pyfunc.py
@@ -13,6 +13,7 @@
 import pip
 import yaml
 import tensorflow as tf
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow.utils import PYTHON_VERSION
@@ -54,13 +55,19 @@ def __init__(self, graph, session, model, image_dims, domain):
         probs_names = ["p({})".format(x) for x in domain]
         self._column_names = ["predicted_label", "predicted_label_id"] + probs_names
 
-    def predict(self, input):
+    def predict(
+        self, input, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
         """
         Generate predictions for the data.
 
         :param input: pandas.DataFrame with one column containing images to be scored. The image
                      column must contain base64 encoded binary content of the image files. The image
                      format must be supported by PIL (e.g. jpeg or png).
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
 
         :return: pandas.DataFrame containing predictions with the following schema:
                      Predicted class: string,
diff --git a/examples/pyfunc/train.py b/examples/pyfunc/train.py
index 7415530289b29f..f27d526148f508 100644
--- a/examples/pyfunc/train.py
+++ b/examples/pyfunc/train.py
@@ -1,4 +1,5 @@
 import os
+from typing import Any, Dict, Optional
 
 from sklearn.datasets import load_iris
 from sklearn.linear_model import LogisticRegression
@@ -14,7 +15,9 @@ class CustomPredict(mlflow.pyfunc.PythonModel):
     def load_context(self, context):
         self.model = mlflow.sklearn.load_model(context.artifacts["custom_model"])
 
-    def predict(self, context, model_input):
+    def predict(
+        self, context, model_input, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
         prediction = self.model.predict(model_input)
         return iris_classes(prediction)
 
diff --git a/examples/sktime/flavor.py b/examples/sktime/flavor.py
index 4c49a88c5ca1fe..eaf320dbfbd263 100644
--- a/examples/sktime/flavor.py
+++ b/examples/sktime/flavor.py
@@ -88,6 +88,7 @@
 )
 from mlflow.utils.requirements_utils import _get_pinned_requirement
 from sktime.utils.multiindex import flatten_multiindex
+from typing import Any, Dict, Optional
 
 FLAVOR_NAME = "sktime"
 
@@ -468,7 +469,9 @@ class _SktimeModelWrapper:
     def __init__(self, sktime_model):
         self.sktime_model = sktime_model
 
-    def predict(self, dataframe) -> pd.DataFrame:
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ) -> pd.DataFrame:  # pylint: disable=unused-argument
         df_schema = dataframe.columns.values.tolist()
 
         if len(dataframe) > 1:
diff --git a/examples/transformers/simple.py b/examples/transformers/simple.py
index ec3e8c82262c63..76891c5b9c810c 100644
--- a/examples/transformers/simple.py
+++ b/examples/transformers/simple.py
@@ -10,16 +10,18 @@
 
 input_example = ["prompt 1", "prompt 2", "prompt 3"]
 
+parameters = {"max_length": 512, "do_sample": True}
+
 signature = mlflow.models.infer_signature(
     input_example,
     mlflow.transformers.generate_signature_output(generation_pipeline, input_example),
+    parameters,
 )
 
 with mlflow.start_run() as run:
     model_info = mlflow.transformers.log_model(
         transformers_model=generation_pipeline,
         artifact_path="text_generator",
-        inference_config={"max_length": 512, "do_sample": True},
         input_example=["prompt 1", "prompt 2", "prompt 3"],
         signature=signature,
     )
@@ -28,6 +30,8 @@
 
 print(
     sentence_generator.predict(
-        ["tell me a story about rocks", "Tell me a joke about a dog that likes spaghetti"]
+        ["tell me a story about rocks", "Tell me a joke about a dog that likes spaghetti"],
+        # pass in additional parameters applied to the pipeline during inference
+        params=parameters,
     )
 )
diff --git a/mlflow/catboost.py b/mlflow/catboost.py
index 93c0a3e22eacff..36e9e48719a568 100644
--- a/mlflow/catboost.py
+++ b/mlflow/catboost.py
@@ -22,6 +22,7 @@
 import os
 import yaml
 import contextlib
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -326,7 +327,18 @@ class _CatboostModelWrapper:
     def __init__(self, cb_model):
         self.cb_model = cb_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         return self.cb_model.predict(dataframe)
 
 
diff --git a/mlflow/diviner.py b/mlflow/diviner.py
index 6bbe51d2a7b0c2..5bfa43beadc2e6 100644
--- a/mlflow/diviner.py
+++ b/mlflow/diviner.py
@@ -21,7 +21,7 @@
 import pathlib
 import yaml
 import pandas as pd
-from typing import Tuple, List
+from typing import Any, Dict, List, Optional, Tuple
 import mlflow
 from mlflow import pyfunc
 from mlflow.environment_variables import MLFLOW_DFS_TMP
@@ -450,7 +450,9 @@ class _DivinerModelWrapper:
     def __init__(self, diviner_model):
         self.diviner_model = diviner_model
 
-    def predict(self, dataframe) -> pd.DataFrame:
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ) -> pd.DataFrame:  # pylint: disable=unused-argument
         """
         A method that allows a pyfunc implementation of this flavor to generate forecasted values
         from the end of a trained Diviner model's training series per group.
@@ -482,6 +484,11 @@ def predict(self, dataframe) -> pd.DataFrame:
 
                           Will generate 30 days of forecasted values for each group that the model
                           was trained on.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
         :return: A Pandas DataFrame containing the forecasted values for each group key that was
                  either trained or declared as a subset with a ``groups`` entry in the ``dataframe``
                  configuration argument.
diff --git a/mlflow/fastai/__init__.py b/mlflow/fastai/__init__.py
index 5c591a716e6eac..bb30b44c272d81 100644
--- a/mlflow/fastai/__init__.py
+++ b/mlflow/fastai/__init__.py
@@ -18,6 +18,7 @@
 from pathlib import Path
 import pandas as pd
 import numpy as np
+from typing import Any, Dict, Optional
 
 from mlflow import pyfunc
 from mlflow.models import Model, ModelSignature, ModelInputExample
@@ -357,7 +358,18 @@ class _FastaiModelWrapper:
     def __init__(self, learner):
         self.learner = learner
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         dl = self.learner.dls.test_dl(dataframe)
         preds, _ = self.learner.get_preds(dl=dl)
         return pd.Series(map(np.array, preds.numpy())).to_frame("predictions")
diff --git a/mlflow/gluon/__init__.py b/mlflow/gluon/__init__.py
index ca2f84474cbd2a..c34646507e053c 100644
--- a/mlflow/gluon/__init__.py
+++ b/mlflow/gluon/__init__.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pandas as pd
 import yaml
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -102,11 +103,18 @@ class _GluonModelWrapper:
     def __init__(self, gluon_model):
         self.gluon_model = gluon_model
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
         """
         :param data: Either a pandas DataFrame or a numpy array containing input array values.
                      If the input is a DataFrame, it will be converted to an array first by a
                      `ndarray = df.values`.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
         :return: Model predictions. If the input is a pandas.DataFrame, the predictions are returned
                  in a pandas.DataFrame. If the input is a numpy array, the predictions are returned
                  as either a numpy.ndarray or a plain list for hybrid models.
diff --git a/mlflow/h2o.py b/mlflow/h2o.py
index c795b1700e963e..d9e62cf10ee7f7 100644
--- a/mlflow/h2o.py
+++ b/mlflow/h2o.py
@@ -10,6 +10,7 @@
 import os
 import warnings
 import yaml
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -269,7 +270,18 @@ class _H2OModelWrapper:
     def __init__(self, h2o_model):
         self.h2o_model = h2o_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import h2o
 
         predicted = self.h2o_model.predict(h2o.H2OFrame(dataframe)).as_data_frame()
diff --git a/mlflow/johnsnowlabs.py b/mlflow/johnsnowlabs.py
index 914f9c4941045f..5793b1e2b80d74 100644
--- a/mlflow/johnsnowlabs.py
+++ b/mlflow/johnsnowlabs.py
@@ -51,6 +51,7 @@
 import shutil
 import sys
 from pathlib import Path
+from typing import Any, Dict, Optional
 
 import yaml
 
@@ -838,12 +839,16 @@ def __init__(
         self.spark = spark or _get_or_create_sparksession()
         self.spark_model = spark_model
 
-    def predict(self, text, output_level=""):
+    def predict(self, text, params: Optional[Dict[str, Any]] = None):
         """
         Generate predictions given input data in a pandas DataFrame.
 
-        :param output_level:
         :param text: pandas DataFrame containing input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
         :return: List with model predictions.
         """
+        output_level = params.get("output_level", "") if params else ""
         return self.spark_model.predict(text, output_level=output_level).reset_index().to_json()
diff --git a/mlflow/langchain/__init__.py b/mlflow/langchain/__init__.py
index faa7f3da7171e9..95a5a30d78feb1 100644
--- a/mlflow/langchain/__init__.py
+++ b/mlflow/langchain/__init__.py
@@ -16,7 +16,7 @@
 import shutil
 import types
 from packaging import version
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import pandas as pd
 import cloudpickle
@@ -638,7 +638,20 @@ class _LangChainModelWrapper:
     def __init__(self, lc_model):
         self.lc_model = lc_model
 
-    def predict(self, data: Union[pd.DataFrame, List[Union[str, Dict[str, Any]]]]) -> List[str]:
+    def predict(  # pylint: disable=unused-argument
+        self,
+        data: Union[pd.DataFrame, List[Union[str, Dict[str, Any]]]],
+        params: Optional[Dict[str, Any]] = None,  # pylint: disable=unused-argument
+    ) -> List[str]:
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         from mlflow.langchain.api_request_parallel_processor import process_api_requests
 
         if isinstance(data, pd.DataFrame):
@@ -659,7 +672,18 @@ class _TestLangChainWrapper(_LangChainModelWrapper):
     A wrapper class that should be used for testing purposes only.
     """
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import langchain
         from tests.langchain.test_langchain_model_export import _mock_async_request
 
diff --git a/mlflow/lightgbm.py b/mlflow/lightgbm.py
index 5a50ef3726cfb3..3c970d6ba94afd 100644
--- a/mlflow/lightgbm.py
+++ b/mlflow/lightgbm.py
@@ -25,6 +25,7 @@
 import functools
 from copy import deepcopy
 from packaging.version import Version
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -464,7 +465,18 @@ class _LGBModelWrapper:
     def __init__(self, lgb_model):
         self.lgb_model = lgb_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         return self.lgb_model.predict(dataframe)
 
 
diff --git a/mlflow/ml-package-versions.yml b/mlflow/ml-package-versions.yml
index 0717cc9878fcd9..0223b1ba08f121 100644
--- a/mlflow/ml-package-versions.yml
+++ b/mlflow/ml-package-versions.yml
@@ -513,6 +513,7 @@ transformers:
           "accelerate", # required for large torch models where weights will not fit in RAM
           "librosa", # required for transformers audio pipelines for bitrate conversion
           "ffmpeg", # required for transformers audio pipelines for audio byte to numpy conversion
+          "sentencepiece", # required for transformers text2text generation pipeline
         ]
       # tensorflow 2.13.0 made all Keras private functions inaccessible. transformers versions
       # prior to 4.30.0 are incompatible with this breaking change in Keras.
diff --git a/mlflow/models/evaluation/default_evaluator.py b/mlflow/models/evaluation/default_evaluator.py
index 881ed2d4fef3ef..dba94eb3f9d998 100644
--- a/mlflow/models/evaluation/default_evaluator.py
+++ b/mlflow/models/evaluation/default_evaluator.py
@@ -19,6 +19,7 @@
     JsonEvaluationArtifact,
 )
 from mlflow.pyfunc import _ServedPyFuncModel
+from mlflow.sklearn import _SklearnModelWrapper
 from mlflow.utils.proto_json_utils import NumpyEncoder
 from mlflow.utils.time_utils import get_current_time_millis
 
@@ -79,6 +80,10 @@ def _infer_model_type_by_labels(labels):
 def _extract_raw_model(model):
     model_loader_module = model.metadata.flavors["python_function"]["loader_module"]
     if model_loader_module == "mlflow.sklearn" and not isinstance(model, _ServedPyFuncModel):
+        # If we load a sklearn model with mlflow.pyfunc.load_model, the model will be wrapped
+        # with _SklearnModelWrapper, we need to extract the raw model from it.
+        if isinstance(model._model_impl, _SklearnModelWrapper):
+            return model_loader_module, model._model_impl.sklearn_model
         return model_loader_module, model._model_impl
     else:
         return model_loader_module, None
diff --git a/mlflow/models/model.py b/mlflow/models/model.py
index f20f8de027d231..9b66e8ebada56c 100644
--- a/mlflow/models/model.py
+++ b/mlflow/models/model.py
@@ -297,6 +297,13 @@ def get_output_schema(self):
         """
         return self.signature.outputs if self.signature is not None else None
 
+    def get_params_schema(self):
+        """
+        Retrieves the parameters schema of the Model iff the model was saved with a schema
+        definition.
+        """
+        return getattr(self.signature, "params", None)
+
     def load_input_example(self, path: str):
         """
         Load the input example saved along a model. Returns None if there is no example metadata
diff --git a/mlflow/models/signature.py b/mlflow/models/signature.py
index ec5e659bb2a839..3b12da2dd76153 100644
--- a/mlflow/models/signature.py
+++ b/mlflow/models/signature.py
@@ -23,8 +23,8 @@
 from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository
 from mlflow.store.artifact.runs_artifact_repo import RunsArtifactRepository
 from mlflow.tracking.artifact_utils import _download_artifact_from_uri, _upload_artifact_to_uri
-from mlflow.types.schema import Schema
-from mlflow.types.utils import _infer_schema, _infer_schema_from_type_hint
+from mlflow.types.schema import ParamSchema, Schema
+from mlflow.types.utils import _infer_param_schema, _infer_schema, _infer_schema_from_type_hint
 from mlflow.utils.uri import append_to_uri_path
 
 
@@ -51,14 +51,15 @@
 
 class ModelSignature:
     """
-    ModelSignature specifies schema of model's inputs and outputs.
+    ModelSignature specifies schema of model's inputs, outputs and params.
 
-    ModelSignature can be :py:func:`inferred <mlflow.models.infer_signature>` from training dataset
-    and model predictions using or constructed by hand by passing an input and output
-    :py:class:`Schema <mlflow.types.Schema>`.
+    ModelSignature can be :py:func:`inferred <mlflow.models.infer_signature>` from training
+    dataset, model predictions using and params for inference, or constructed by hand by
+    passing an input and output :py:class:`Schema <mlflow.types.Schema>`, and params
+    :py:class:`ParamSchema <mlflow.types.ParamSchema>`.
     """
 
-    def __init__(self, inputs: Schema, outputs: Schema = None):
+    def __init__(self, inputs: Schema, outputs: Schema = None, params: ParamSchema = None):
         if not isinstance(inputs, Schema):
             raise TypeError(f"inputs must be mlflow.models.signature.Schema, got '{type(inputs)}'")
         if outputs is not None and not isinstance(outputs, Schema):
@@ -66,8 +67,14 @@ def __init__(self, inputs: Schema, outputs: Schema = None):
                 "outputs must be either None or mlflow.models.signature.Schema, "
                 "got '{}'".format(type(inputs))
             )
+        if params and not isinstance(params, ParamSchema):
+            raise TypeError(
+                "If params are provided, they must by of type mlflow.models.signature.ParamSchema. "
+                "Got '{}'".format(type(params))
+            )
         self.inputs = inputs
         self.outputs = outputs
+        self.params = params
 
     def to_dict(self) -> Dict[str, Any]:
         """
@@ -82,6 +89,7 @@ def to_dict(self) -> Dict[str, Any]:
         return {
             "inputs": self.inputs.to_json(),
             "outputs": self.outputs.to_json() if self.outputs is not None else None,
+            "params": self.params.to_json() if self.params else None,
         }
 
     @classmethod
@@ -91,22 +99,29 @@ def from_dict(cls, signature_dict: Dict[str, Any]):
 
         :param signature_dict: Dictionary representation of model signature.
                                Expected dictionary format:
-                               `{'inputs': <json string>, 'outputs': <json string>" }`
+                               `{'inputs': <json string>,
+                               'outputs': <json string>,
+                               'params': <json string>" }`
 
         :return: ModelSignature populated with the data form the dictionary.
         """
         inputs = Schema.from_json(signature_dict["inputs"])
         if "outputs" in signature_dict and signature_dict["outputs"] is not None:
             outputs = Schema.from_json(signature_dict["outputs"])
-            return cls(inputs, outputs)
         else:
-            return cls(inputs)
+            outputs = None
+        if (params := signature_dict.get("params")) is not None:
+            params = ParamSchema.from_json(params)
+            return cls(inputs, outputs, params)
+        else:
+            return cls(inputs, outputs)
 
     def __eq__(self, other) -> bool:
         return (
             isinstance(other, ModelSignature)
             and self.inputs == other.inputs
             and self.outputs == other.outputs
+            and self.params == other.params
         )
 
     def __repr__(self) -> str:
@@ -114,18 +129,24 @@ def __repr__(self) -> str:
             "inputs: \n"
             "  {}\n"
             "outputs: \n"
-            "  {}\n".format(repr(self.inputs), repr(self.outputs))
+            "  {}\n"
+            "params: \n"
+            "  {}\n".format(repr(self.inputs), repr(self.outputs), repr(self.params))
         )
 
 
 def infer_signature(
-    model_input: Any, model_output: "MlflowInferableDataset" = None
+    model_input: Any,
+    model_output: "MlflowInferableDataset" = None,
+    params: Optional[Dict[str, Any]] = None,
 ) -> ModelSignature:
     """
-    Infer an MLflow model signature from the training data (input) and model predictions (output).
+    Infer an MLflow model signature from the training data (input), model predictions (output)
+    and parameters (for inference).
 
     The signature represents model input and output as data frames with (optionally) named columns
-    and data type specified as one of types defined in :py:class:`mlflow.types.DataType`.
+    and data type specified as one of types defined in :py:class:`mlflow.types.DataType`. It also
+    includes parameters schema for inference, .
     This method will raise an exception if the user data contains incompatible types or is not
     passed in one of the supported formats listed below.
 
@@ -147,11 +168,53 @@ def infer_signature(
     :param model_input: Valid input to the model. E.g. (a subset of) the training dataset.
     :param model_output: Valid model output. E.g. Model predictions for the (subset of) training
                          dataset.
+    :param params: Valid parameters for inference. It should be a dictionary of parameters
+                   that can be set on the model during inference by passing `params` to pyfunc
+                   `predict` method.
+
+                   An example of valid parameters:
+
+                   .. code-block:: python
+
+                        from mlflow.models import infer_signature
+                        from mlflow.transformers import generate_signature_output
+
+                        # Define parameters for inference
+                        params = {
+                            "num_beams": 5,
+                            "max_length": 30,
+                            "do_sample": True,
+                            "remove_invalid_values": True,
+                        }
+
+                        # Infer the signature including parameters
+                        signature = infer_signature(
+                            data,
+                            generate_signature_output(model, data),
+                            params=params,
+                        )
+
+                        # Saving model with model signature
+                        mlflow.transformers.save_model(
+                            model,
+                            path=model_path,
+                            signature=signature,
+                        )
+
+                        pyfunc_loaded = mlflow.pyfunc.load_model(model_path)
+
+                        # Passing params to `predict` function directly
+                        result = pyfunc_loaded.predict(data, params=params)
+
+                   .. Note:: Experimental: This parameter may change or be removed in a future
+                                           release without warning.
+
     :return: ModelSignature
     """
     inputs = _infer_schema(model_input)
     outputs = _infer_schema(model_output) if model_output is not None else None
-    return ModelSignature(inputs, outputs)
+    params = _infer_param_schema(params) if params else None
+    return ModelSignature(inputs, outputs, params)
 
 
 # `t\w*\.` matches the `typing` module or its alias
diff --git a/mlflow/models/utils.py b/mlflow/models/utils.py
index d35a997d946921..86e6f0664c4bd7 100644
--- a/mlflow/models/utils.py
+++ b/mlflow/models/utils.py
@@ -1,7 +1,8 @@
 import decimal
 import json
+import logging
 import os
-from typing import Union, Any, Dict, List
+from typing import Union, Any, Dict, List, Optional
 
 import numpy as np
 import pandas as pd
@@ -9,7 +10,7 @@
 from mlflow.exceptions import MlflowException, INVALID_PARAMETER_VALUE
 from mlflow.models import Model
 from mlflow.store.artifact.utils.models import get_model_name_and_version
-from mlflow.types import DataType, Schema, TensorSpec
+from mlflow.types import DataType, ParamSchema, Schema, TensorSpec, ParamSpec
 from mlflow.types.utils import TensorsNotSupportedException, clean_tensor_type
 from mlflow.utils.annotations import experimental
 from mlflow.utils.proto_json_utils import (
@@ -35,6 +36,8 @@
 ]
 PyFuncOutput = Union[pd.DataFrame, pd.Series, np.ndarray, list, str]
 
+_logger = logging.getLogger(__name__)
+
 
 class _Example:
     """
@@ -884,3 +887,59 @@ def get_model_version_from_model_uri(model_uri):
     (name, version) = get_model_name_and_version(client, model_uri)
     model_version = client.get_model_version(name, version)
     return model_version
+
+
+def _enforce_params_schema(params: Optional[Dict[str, Any]], schema: Optional[ParamSchema]):
+    if schema is None:
+        if params in [None, {}]:
+            return params
+        raise MlflowException.invalid_parameter_value(
+            "`params` can only be specified at inference time if the model signature "
+            "defines a params schema. This model does not define a params schema.",
+        )
+    params = {} if params is None else params
+    if not isinstance(params, dict):
+        raise MlflowException.invalid_parameter_value(
+            f"Parameters must be a dictionary. Got type '{type(params).__name__}'.",
+        )
+    if not isinstance(schema, ParamSchema):
+        raise MlflowException.invalid_parameter_value(
+            "Parameters schema must be an instance of ParamSchema. "
+            f"Got type '{type(schema).__name__}'.",
+        )
+    if any(not isinstance(k, str) for k in params.keys()):
+        _logger.warning(
+            "Keys in parameters should be of type `str`, but received non-string keys."
+            "Converting all keys to string..."
+        )
+        params = {str(k): v for k, v in params.items()}
+
+    allowed_keys = {param.name for param in schema.params}
+    ignored_keys = set(params) - allowed_keys
+    if ignored_keys:
+        _logger.warning(
+            f"Unrecognized params {list(ignored_keys)} are ignored for inference. "
+            f"Supported params are: {allowed_keys}. "
+            "To enable them, please add corresponding schema in ModelSignature."
+        )
+
+    params = {k: params[k] for k in params if k in allowed_keys}
+
+    invalid_params = set()
+    for param_spec in schema.params:
+        if param_spec.name in params:
+            try:
+                params[param_spec.name] = ParamSpec.validate_param_spec(
+                    params[param_spec.name], param_spec
+                )
+            except MlflowException as e:
+                invalid_params.add((param_spec.name, e.message))
+        else:
+            params[param_spec.name] = param_spec.default
+
+    if invalid_params:
+        raise MlflowException.invalid_parameter_value(
+            f"Invalid parameters found: {invalid_params!r}",
+        )
+
+    return params
diff --git a/mlflow/onnx.py b/mlflow/onnx.py
index 4ab7f8070e981d..822780b3388708 100644
--- a/mlflow/onnx.py
+++ b/mlflow/onnx.py
@@ -12,6 +12,7 @@
 import numpy as np
 from pathlib import Path
 from packaging.version import Version
+from typing import Any, Dict, Optional
 
 import pandas as pd
 
@@ -316,7 +317,9 @@ def _cast_float64_to_float32(self, feeds):
                     feeds[input_name] = feed.astype(np.float32)
         return feeds
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
         """
         :param data: Either a pandas DataFrame, numpy.ndarray or a dictionary.
 
@@ -336,6 +339,10 @@ def predict(self, data):
 
                       For more information about the ONNX Runtime, see
                       `<https://github.com/microsoft/onnxruntime>`_.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
         :return: Model predictions. If the input is a pandas.DataFrame, the predictions are returned
                  in a pandas.DataFrame. If the input is a numpy array or a dictionary the
                  predictions are returned in a dictionary.
diff --git a/mlflow/openai/__init__.py b/mlflow/openai/__init__.py
index f5a1e816422c64..97f92f8039acbe 100644
--- a/mlflow/openai/__init__.py
+++ b/mlflow/openai/__init__.py
@@ -31,6 +31,7 @@
 from enum import Enum
 from string import Formatter
 import itertools
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -525,7 +526,18 @@ def get_params_list(self, data):
         else:
             return data[self.variables].to_dict(orient="records")
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         from mlflow.openai.api_request_parallel_processor import process_api_requests
 
         if self.variables:
@@ -559,7 +571,18 @@ class _TestOpenAIWrapper(_OpenAIWrapper):
     A wrapper class that should be used for testing purposes only.
     """
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         from mlflow.openai.utils import _mock_chat_completion_request
 
         with _mock_chat_completion_request():
diff --git a/mlflow/paddle/__init__.py b/mlflow/paddle/__init__.py
index 5eaa333066728c..db9499755e3597 100644
--- a/mlflow/paddle/__init__.py
+++ b/mlflow/paddle/__init__.py
@@ -14,6 +14,7 @@
 import os
 import logging
 import yaml
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -447,7 +448,18 @@ class _PaddleWrapper:
     def __init__(self, pd_model):
         self.pd_model = pd_model
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import pandas as pd
         import numpy as np
 
diff --git a/mlflow/pmdarima.py b/mlflow/pmdarima.py
index 508c3107585ab0..fa6561e00bdce8 100644
--- a/mlflow/pmdarima.py
+++ b/mlflow/pmdarima.py
@@ -18,6 +18,7 @@
 import pandas as pd
 import yaml
 from packaging.version import Version
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -342,7 +343,18 @@ def __init__(self, pmdarima_model):
         self.pmdarima_model = pmdarima_model
         self._pmdarima_version = pmdarima.__version__
 
-    def predict(self, dataframe) -> pd.DataFrame:
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ) -> pd.DataFrame:  # pylint: disable=unused-argument
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         df_schema = dataframe.columns.values.tolist()
 
         if len(dataframe) > 1:
diff --git a/mlflow/prophet.py b/mlflow/prophet.py
index 336b64a3284ecb..3a77418f3125d0 100644
--- a/mlflow/prophet.py
+++ b/mlflow/prophet.py
@@ -14,6 +14,7 @@
 import os
 import yaml
 import json
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -336,5 +337,16 @@ class _ProphetModelWrapper:
     def __init__(self, pr_model):
         self.pr_model = pr_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None
+    ):  # pylint: disable=unused-argument
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         return self.pr_model.predict(dataframe)
diff --git a/mlflow/pyfunc/__init__.py b/mlflow/pyfunc/__init__.py
index 79268b1ac5a805..69dd0394693461 100644
--- a/mlflow/pyfunc/__init__.py
+++ b/mlflow/pyfunc/__init__.py
@@ -219,7 +219,7 @@
 import inspect
 import functools
 from copy import deepcopy
-from typing import Any, Union, Iterator, Tuple
+from typing import Any, Dict, Optional, Union, Iterator, Tuple
 
 import numpy as np
 import pandas
@@ -236,6 +236,7 @@
 from mlflow.models.utils import (
     PyFuncInput,
     PyFuncOutput,
+    _enforce_params_schema,
     _enforce_schema,
     _save_example,
 )
@@ -248,7 +249,10 @@
     PythonModelContext,
     get_default_conda_env,
 )
-from mlflow.pyfunc.model import get_default_pip_requirements
+from mlflow.pyfunc.model import (
+    get_default_pip_requirements,
+    _log_warning_if_params_not_in_predict_signature,
+)
 from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
 from mlflow.tracking.artifact_utils import _download_artifact_from_uri
 from mlflow.utils import (
@@ -364,6 +368,19 @@ def _load_model_env(path):
     return _get_flavor_configuration(model_path=path, flavor_name=FLAVOR_NAME).get(ENV, None)
 
 
+def _validate_params(params, model_metadata):
+    if hasattr(model_metadata, "get_params_schema"):
+        params_schema = model_metadata.get_params_schema()
+        return _enforce_params_schema(params, params_schema)
+    if params:
+        raise MlflowException.invalid_parameter_value(
+            "This model was not logged with a params schema and does not support "
+            "providing the params argument."
+            "Please log the model with mlflow >= 2.6.0 and specify a params schema.",
+        )
+    return
+
+
 class PyFuncModel:
     """
     MLflow 'python function' model.
@@ -388,7 +405,7 @@ def __init__(self, model_meta: Model, model_impl: Any, predict_fn: str = "predic
         self._model_impl = model_impl
         self._predict_fn = getattr(model_impl, predict_fn)
 
-    def predict(self, data: PyFuncInput) -> PyFuncOutput:
+    def predict(self, data: PyFuncInput, params: Optional[Dict[str, Any]] = None) -> PyFuncOutput:
         """
         Generate model predictions.
 
@@ -409,23 +426,39 @@ def predict(self, data: PyFuncInput) -> PyFuncOutput:
                      (i.e. read / write the elements using C-like index order), and DataFrame
                      column values will be cast as the required tensor spec type.
 
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
         :return: Model predictions as one of pandas.DataFrame, pandas.Series, numpy.ndarray or list.
         """
         input_schema = self.metadata.get_input_schema()
         if input_schema is not None:
             data = _enforce_schema(data, input_schema)
 
+        params = _validate_params(params, self.metadata)
+
+        def _predict():
+            # Models saved prior to MLflow 2.5.0 do not support `params` in the pyfunc `predict()`
+            # function definition, nor do they support `**kwargs`. Accordingly, we only pass
+            # `params` to the `predict()` method if it defines the `params` argument
+            if inspect.signature(self._predict_fn).parameters.get("params"):
+                return self._predict_fn(data, params=params)
+            _log_warning_if_params_not_in_predict_signature(_logger, params)
+            return self._predict_fn(data)
+
         if "openai" in sys.modules and MLFLOW_OPENAI_RETRIES_ENABLED.get():
             from mlflow.openai.retry import openai_auto_retry_patch
 
             try:
                 with openai_auto_retry_patch():
-                    return self._predict_fn(data)
+                    return _predict()
             except Exception:
                 if _MLFLOW_TESTING.get():
                     raise
 
-        return self._predict_fn(data)
+        return _predict()
 
     @experimental
     def unwrap_python_model(self):
@@ -445,10 +478,10 @@ def unwrap_python_model(self):
 
             # define a custom model
             class MyModel(mlflow.pyfunc.PythonModel):
-                def predict(self, context, model_input):
-                    return self.my_custom_function(model_input)
+                def predict(self, context, model_input, params=None):
+                    return self.my_custom_function(model_input, params)
 
-                def my_custom_function(self, model_input):
+                def my_custom_function(self, model_input, params=None):
                     # do something with the model input
                     return 0
 
@@ -605,8 +638,21 @@ def __init__(self, model_meta: Model, client: Any, server_pid: int):
         self._client = client
         self._server_pid = server_pid
 
-    def predict(self, data):
-        result = self._client.invoke(data).get_predictions()
+    def predict(self, data, params=None):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
+        if inspect.signature(self._client.invoke).parameters.get("params"):
+            result = self._client.invoke(data, params=params).get_predictions()
+        else:
+            _log_warning_if_params_not_in_predict_signature(_logger, params)
+            result = self._client.invoke(data).get_predictions()
         if isinstance(result, pandas.DataFrame):
             result = result[result.columns[0]]
         return result
@@ -980,7 +1026,13 @@ def _check_udf_return_type(data_type):
     return False
 
 
-def spark_udf(spark, model_uri, result_type=None, env_manager=_EnvManager.LOCAL):
+def spark_udf(
+    spark,
+    model_uri,
+    result_type=None,
+    env_manager=_EnvManager.LOCAL,
+    params: Optional[Dict[str, Any]] = None,
+):
     """
     A Spark UDF that can be used to invoke the Python function formatted model.
 
@@ -1084,6 +1136,11 @@ def spark_udf(spark, model_uri, result_type=None, env_manager=_EnvManager.LOCAL)
                            may differ from the environment used to train the model and may lead to
                            errors or invalid predictions.
 
+    :param params: Additional parameters to pass to the model for inference.
+
+                   .. Note:: Experimental: This parameter may change or be removed in a future
+                                           release without warning.
+
     :return: Spark UDF that applies the model's ``predict`` method to the data and returns a
              type specified by ``result_type``, which by default is a double.
     """
@@ -1206,6 +1263,7 @@ def spark_udf(spark, model_uri, result_type=None, env_manager=_EnvManager.LOCAL)
    e.g., struct<a:int, b:array<int>>.
 """
         )
+    params = _validate_params(params, model_metadata)
 
     def _predict_row_batch(predict_fn, args):
         input_schema = model_metadata.get_input_schema()
@@ -1237,7 +1295,7 @@ def _predict_row_batch(predict_fn, args):
                     )
             pdf = pandas.DataFrame(data={names[i]: x for i, x in enumerate(args)}, columns=names)
 
-        result = predict_fn(pdf)
+        result = predict_fn(pdf, params)
 
         if isinstance(result, dict):
             result = {k: list(v) for k, v in result.items()}
@@ -1407,8 +1465,8 @@ def server_redirect_log_thread_func(child_stdout):
             server_redirect_log_thread = threading.Thread(
                 target=server_redirect_log_thread_func,
                 args=(scoring_server_proc.stdout,),
+                daemon=True,
             )
-            server_redirect_log_thread.setDaemon(True)
             server_redirect_log_thread.start()
 
             try:
@@ -1425,7 +1483,10 @@ def server_redirect_log_thread_func(child_stdout):
                 err_msg += "".join(server_tail_logs)
                 raise MlflowException(err_msg) from e
 
-            def batch_predict_fn(pdf):
+            def batch_predict_fn(pdf, params=None):
+                if inspect.signature(client.invoke).parameters.get("params"):
+                    return client.invoke(pdf, params=params).get_predictions()
+                _log_warning_if_params_not_in_predict_signature(_logger, params)
                 return client.invoke(pdf).get_predictions()
 
         elif env_manager == _EnvManager.LOCAL:
@@ -1434,7 +1495,10 @@ def batch_predict_fn(pdf):
             else:
                 loaded_model = mlflow.pyfunc.load_model(local_model_path)
 
-            def batch_predict_fn(pdf):
+            def batch_predict_fn(pdf, params=None):
+                if inspect.signature(loaded_model.predict).parameters.get("params"):
+                    return loaded_model.predict(pdf, params=params)
+                _log_warning_if_params_not_in_predict_signature(_logger, params)
                 return loaded_model.predict(pdf)
 
         try:
@@ -1585,7 +1649,7 @@ def save_model(
 
 
             class MyModel(mlflow.pyfunc.PythonModel):
-                def predict(self, context, model_input: List[str]) -> List[str]:
+                def predict(self, context, model_input: List[str], params=None) -> List[str]:
                     return [i.upper() for i in model_input]
 
 
@@ -1838,7 +1902,7 @@ def log_model(
 
 
             class MyModel(mlflow.pyfunc.PythonModel):
-                def predict(self, context, model_input: List[str]) -> List[str]:
+                def predict(self, context, model_input: List[str], params=None) -> List[str]:
                     return [i.upper() for i in model_input]
 
 
diff --git a/mlflow/pyfunc/model.py b/mlflow/pyfunc/model.py
index 51bc9e86f412b4..5fded693152862 100644
--- a/mlflow/pyfunc/model.py
+++ b/mlflow/pyfunc/model.py
@@ -3,11 +3,13 @@
 models with a user-defined ``PythonModel`` subclass.
 """
 
+import inspect
+import logging
 import os
 import posixpath
 import shutil
 import yaml
-from typing import Dict, List
+from typing import Any, Dict, List, Optional
 from abc import ABCMeta, abstractmethod
 
 import cloudpickle
@@ -41,6 +43,9 @@
 CONFIG_KEY_CLOUDPICKLE_VERSION = "cloudpickle_version"
 
 
+_logger = logging.getLogger(__name__)
+
+
 def get_default_pip_requirements():
     """
     :return: A list of default pip requirements for MLflow Models produced by this flavor.
@@ -60,6 +65,14 @@ def get_default_conda_env():
     return _mlflow_conda_env(additional_pip_deps=get_default_pip_requirements())
 
 
+def _log_warning_if_params_not_in_predict_signature(logger, params):
+    if params:
+        logger.warning(
+            "The underlying model does not support passing additional parameters to the predict"
+            f" function. `params` {params} will be ignored."
+        )
+
+
 class PythonModel:
     """
     Represents a generic Python model that evaluates inputs and produces API-compatible outputs.
@@ -89,7 +102,7 @@ def _get_type_hints(self):
         return _extract_type_hints(self.predict, input_arg_index=1)
 
     @abstractmethod
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params: Optional[Dict[str, Any]] = None):
         """
         Evaluates a pyfunc-compatible input and produces a pyfunc-compatible output.
         For more information about the pyfunc input/output API, see the :ref:`pyfunc-inference-api`.
@@ -97,6 +110,10 @@ def predict(self, context, model_input):
         :param context: A :class:`~PythonModelContext` instance containing artifacts that the model
                         can use to perform inference.
         :param model_input: A pyfunc-compatible input for the model to evaluate.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
         """
 
 
@@ -114,7 +131,26 @@ def __init__(self, func, hints=None, signature=None):
     def _get_type_hints(self):
         return _extract_type_hints(self.func, input_arg_index=0)
 
-    def predict(self, context, model_input):
+    def predict(
+        self,
+        context,  # pylint: disable=unused-argument
+        model_input,
+        params: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        :param context: A :class:`~PythonModelContext` instance containing artifacts that the model
+                        can use to perform inference.
+        :param model_input: A pyfunc-compatible input for the model to evaluate.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
+        if inspect.signature(self.func).parameters.get("params"):
+            return self.func(model_input, params=params)
+        _log_warning_if_params_not_in_predict_signature(_logger, params)
         return self.func(model_input)
 
 
@@ -361,5 +397,19 @@ def _convert_input(self, model_input):
 
         return model_input
 
-    def predict(self, model_input):
+    def predict(self, model_input, params: Optional[Dict[str, Any]] = None):
+        """
+        :param model_input: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
+        if inspect.signature(self.python_model.predict).parameters.get("params"):
+            return self.python_model.predict(
+                self.context, self._convert_input(model_input), params=params
+            )
+        _log_warning_if_params_not_in_predict_signature(_logger, params)
         return self.python_model.predict(self.context, self._convert_input(model_input))
diff --git a/mlflow/pyfunc/scoring_server/__init__.py b/mlflow/pyfunc/scoring_server/__init__.py
index 159704eea0428c..985918f306540c 100644
--- a/mlflow/pyfunc/scoring_server/__init__.py
+++ b/mlflow/pyfunc/scoring_server/__init__.py
@@ -12,8 +12,9 @@
     /version used for getting the mlflow version
     /invocations used for scoring
 """
-from typing import Tuple, Dict
+from typing import Dict, NamedTuple, Tuple
 import flask
+import inspect
 import json
 import logging
 import os
@@ -29,8 +30,10 @@
 # dependencies to the minimum here.
 # ALl of the mlflow dependencies below need to be backwards compatible.
 from mlflow.exceptions import MlflowException
+from mlflow.pyfunc.model import _log_warning_if_params_not_in_predict_signature
 from mlflow.types import Schema
 from mlflow.utils import reraise
+from mlflow.utils.annotations import deprecated
 from mlflow.utils.file_utils import path_to_local_file_uri
 from mlflow.utils.os import is_windows
 from mlflow.utils.proto_json_utils import (
@@ -84,6 +87,9 @@
 )
 
 
+# Keep this method to maintain compatibility with MLServer
+# https://github.com/SeldonIO/MLServer/blob/caa173ab099a4ec002a7c252cbcc511646c261a6/runtimes/mlflow/mlserver_mlflow/runtime.py#L13C5-L13C31
+@deprecated("infer_and_parse_data", "2.6.0")
 def infer_and_parse_json_input(json_input, schema: Schema = None):
     """
     :param json_input: A JSON-formatted string representation of TF serving input or a Pandas
@@ -134,6 +140,72 @@ def infer_and_parse_json_input(json_input, schema: Schema = None):
         )
 
 
+def _decode_json_input(json_input):
+    """
+    :param json_input: A JSON-formatted string representation of TF serving input or a Pandas
+                       DataFrame, or a stream containing such a string representation.
+    :return: A dictionary representation of the JSON input.
+    """
+    if isinstance(json_input, dict):
+        return json_input
+
+    try:
+        decoded_input = json.loads(json_input)
+    except json.decoder.JSONDecodeError as ex:
+        raise MlflowException(
+            message=(
+                "Failed to parse input from JSON. Ensure that input is a valid JSON"
+                f" formatted string. Input: \n{json_input}\n"
+            ),
+            error_code=BAD_REQUEST,
+        ) from ex
+
+    if isinstance(decoded_input, dict):
+        return decoded_input
+    if isinstance(decoded_input, list):
+        raise MlflowException(
+            message=f"{REQUIRED_INPUT_FORMAT}. Received a list. {SCORING_PROTOCOL_CHANGE_INFO}",
+            error_code=BAD_REQUEST,
+        )
+
+    raise MlflowException(
+        message=f"{REQUIRED_INPUT_FORMAT}. Received unexpected input type '{type(decoded_input)}.",
+        error_code=BAD_REQUEST,
+    )
+
+
+def _split_data_and_params(json_input):
+    input_dict = _decode_json_input(json_input)
+    data = {k: v for k, v in input_dict.items() if k in SUPPORTED_FORMATS}
+    params = input_dict.pop("params", None)
+    return data, params
+
+
+def infer_and_parse_data(data, schema: Schema = None):
+    """
+    :param data: A dictionary representation of TF serving input or a Pandas
+                 DataFrame, or a stream containing such a string representation.
+    :param schema: Optional schema specification to be used during parsing.
+    """
+
+    format_keys = set(data.keys()).intersection(SUPPORTED_FORMATS)
+    if len(format_keys) != 1:
+        message = f"Received dictionary with input fields: {list(data.keys())}"
+        raise MlflowException(
+            message=f"{REQUIRED_INPUT_FORMAT}. {message}. {SCORING_PROTOCOL_CHANGE_INFO}",
+            error_code=BAD_REQUEST,
+        )
+    input_format = format_keys.pop()
+    if input_format in (INSTANCES, INPUTS):
+        return parse_tf_serving_input(data, schema=schema)
+
+    if input_format in (DF_SPLIT, DF_RECORDS):
+        pandas_orient = input_format[10:]  # skip the dataframe_ prefix
+        return dataframe_from_parsed_json(
+            data[input_format], pandas_orient=pandas_orient, schema=schema
+        )
+
+
 def parse_csv_input(csv_input, schema: Schema = None):
     """
     :param csv_input: A CSV-formatted string representation of a Pandas DataFrame, or a stream
@@ -189,6 +261,82 @@ def _handle_serving_error(error_message, error_code, include_traceback=True):
     reraise(MlflowException, e)
 
 
+class InvocationsResponse(NamedTuple):
+    response: str
+    status: int
+    mimetype: str
+
+
+def invocations(data, content_type, model, input_schema):
+    type_parts = list(map(str.strip, content_type.split(";")))
+    mime_type = type_parts[0]
+    parameter_value_pairs = type_parts[1:]
+    parameter_values = {
+        key: value for pair in parameter_value_pairs for key, _, value in [pair.partition("=")]
+    }
+
+    charset = parameter_values.get("charset", "utf-8").lower()
+    if charset != "utf-8":
+        return InvocationsResponse(
+            response="The scoring server only supports UTF-8",
+            status=415,
+            mimetype="text/plain",
+        )
+
+    unexpected_content_parameters = set(parameter_values.keys()).difference({"charset"})
+    if unexpected_content_parameters:
+        return InvocationsResponse(
+            response=(
+                f"Unrecognized content type parameters: "
+                f"{', '.join(unexpected_content_parameters)}. "
+                f"{SCORING_PROTOCOL_CHANGE_INFO}"
+            ),
+            status=415,
+            mimetype="text/plain",
+        )
+    # Convert from CSV to pandas
+    if mime_type == CONTENT_TYPE_CSV:
+        csv_input = StringIO(data)
+        data = parse_csv_input(csv_input=csv_input, schema=input_schema)
+        params = None
+    elif mime_type == CONTENT_TYPE_JSON:
+        data, params = _split_data_and_params(data)
+        data = infer_and_parse_data(data, input_schema)
+    else:
+        return InvocationsResponse(
+            response=(
+                "This predictor only supports the following content types:"
+                f" Types: {CONTENT_TYPES}."
+                f" Got '{flask.request.content_type}'."
+            ),
+            status=415,
+            mimetype="text/plain",
+        )
+
+    # Do the prediction
+    try:
+        if inspect.signature(model.predict).parameters.get("params"):
+            raw_predictions = model.predict(data, params=params)
+        else:
+            _log_warning_if_params_not_in_predict_signature(_logger, params)
+            raw_predictions = model.predict(data)
+    except MlflowException as e:
+        raise e
+    except Exception:
+        raise MlflowException(
+            message=(
+                "Encountered an unexpected error while evaluating the model. Verify"
+                " that the serialized input Dataframe is compatible with the model for"
+                " inference."
+            ),
+            error_code=BAD_REQUEST,
+            stack_trace=traceback.format_exc(),
+        )
+    result = StringIO()
+    predictions_to_json(raw_predictions, result)
+    return InvocationsResponse(response=result.getvalue(), status=200, mimetype="application/json")
+
+
 def init(model: PyFuncModel):
     """
     Initialize the server. Loads pyfunc model from the path.
@@ -226,71 +374,13 @@ def transformation():
         # Content-Type can include other attributes like CHARSET
         # Content-type RFC: https://datatracker.ietf.org/doc/html/rfc2045#section-5.1
         # TODO: Suport ";" in quoted parameter values
-        type_parts = flask.request.content_type.split(";")
-        type_parts = list(map(str.strip, type_parts))
-        mime_type = type_parts[0]
-        parameter_value_pairs = type_parts[1:]
-        parameter_values = {}
-        for parameter_value_pair in parameter_value_pairs:
-            (key, _, value) = parameter_value_pair.partition("=")
-            parameter_values[key] = value
-
-        charset = parameter_values.get("charset", "utf-8").lower()
-        if charset != "utf-8":
-            return flask.Response(
-                response="The scoring server only supports UTF-8",
-                status=415,
-                mimetype="text/plain",
-            )
+        data = flask.request.data.decode("utf-8")
+        content_type = flask.request.content_type
+        result = invocations(data, content_type, model, input_schema)
 
-        unexpected_content_parameters = set(parameter_values.keys()).difference({"charset"})
-        if unexpected_content_parameters:
-            return flask.Response(
-                response=(
-                    f"Unrecognized content type parameters: "
-                    f"{', '.join(unexpected_content_parameters)}. "
-                    f"{SCORING_PROTOCOL_CHANGE_INFO}"
-                ),
-                status=415,
-                mimetype="text/plain",
-            )
-        # Convert from CSV to pandas
-        if mime_type == CONTENT_TYPE_CSV:
-            data = flask.request.data.decode("utf-8")
-            csv_input = StringIO(data)
-            data = parse_csv_input(csv_input=csv_input, schema=input_schema)
-        elif mime_type == CONTENT_TYPE_JSON:
-            json_str = flask.request.data.decode("utf-8")
-            data = infer_and_parse_json_input(json_str, input_schema)
-        else:
-            return flask.Response(
-                response=(
-                    "This predictor only supports the following content types:"
-                    f" Types: {CONTENT_TYPES}."
-                    f" Got '{flask.request.content_type}'."
-                ),
-                status=415,
-                mimetype="text/plain",
-            )
-
-        # Do the prediction
-        try:
-            raw_predictions = model.predict(data)
-        except MlflowException as e:
-            raise e
-        except Exception:
-            raise MlflowException(
-                message=(
-                    "Encountered an unexpected error while evaluating the model. Verify"
-                    " that the serialized input Dataframe is compatible with the model for"
-                    " inference."
-                ),
-                error_code=BAD_REQUEST,
-                stack_trace=traceback.format_exc(),
-            )
-        result = StringIO()
-        predictions_to_json(raw_predictions, result)
-        return flask.Response(response=result.getvalue(), status=200, mimetype="application/json")
+        return flask.Response(
+            response=result.response, status=result.status, mimetype=result.mimetype
+        )
 
     return app
 
@@ -304,20 +394,28 @@ def _predict(model_uri, input_path, output_path, content_type):
         else:
             with open(input_path) as f:
                 input_str = f.read()
-        df = infer_and_parse_json_input(input_str)
+        data, params = _split_data_and_params(input_str)
+        df = infer_and_parse_data(data)
     elif content_type == "csv":
         if input_path is not None:
             df = parse_csv_input(input_path)
         else:
             df = parse_csv_input(sys.stdin)
+        params = None
     else:
         raise Exception(f"Unknown content type '{content_type}'")
 
+    if inspect.signature(pyfunc_model.predict).parameters.get("params"):
+        raw_predictions = pyfunc_model.predict(df, params=params)
+    else:
+        _log_warning_if_params_not_in_predict_signature(_logger, params)
+        raw_predictions = pyfunc_model.predict(df)
+
     if output_path is None:
-        predictions_to_json(pyfunc_model.predict(df), sys.stdout)
+        predictions_to_json(raw_predictions, sys.stdout)
     else:
         with open(output_path, "w") as fout:
-            predictions_to_json(pyfunc_model.predict(df), fout)
+            predictions_to_json(raw_predictions, fout)
 
 
 def _serve(model_uri, port, host):
diff --git a/mlflow/pyfunc/scoring_server/client.py b/mlflow/pyfunc/scoring_server/client.py
index caef6dd7d93105..1a846609bfad3f 100644
--- a/mlflow/pyfunc/scoring_server/client.py
+++ b/mlflow/pyfunc/scoring_server/client.py
@@ -6,6 +6,7 @@
 import uuid
 from pathlib import Path
 from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
 
 
 from mlflow.pyfunc import scoring_server
@@ -25,10 +26,16 @@ def wait_server_ready(self, timeout=30, scoring_server_proc=None):
         """
 
     @abstractmethod
-    def invoke(self, data):
+    def invoke(self, data, params: Optional[Dict[str, Any]] = None):
         """
         Invoke inference on input data. The input data must be pandas dataframe or numpy array or
         a dict of numpy arrays.
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+        :return: Prediction result.
         """
 
 
@@ -65,10 +72,18 @@ def wait_server_ready(self, timeout=30, scoring_server_proc=None):
                     raise RuntimeError(f"Server process already exit with returncode {return_code}")
         raise RuntimeError("Wait scoring server ready timeout.")
 
-    def invoke(self, data):
+    def invoke(self, data, params: Optional[Dict[str, Any]] = None):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+        :return: :py:class:`PredictionsResponse <mlflow.deployments.PredictionsResponse>` result.
+        """
         response = requests.post(
             url=self.url_prefix + "/invocations",
-            data=dump_input_data(data),
+            data=dump_input_data(data, params=params),
             headers={"Content-Type": scoring_server.CONTENT_TYPE_JSON},
         )
         if response.status_code != 200:
@@ -80,6 +95,7 @@ def invoke(self, data):
 
 class StdinScoringServerClient(BaseScoringServerClient):
     def __init__(self, process):
+        super().__init__()
         self.process = process
         self.tmpdir = Path(tempfile.mkdtemp())
         self.output_json = self.tmpdir.joinpath("output.json")
@@ -89,10 +105,17 @@ def wait_server_ready(self, timeout=30, scoring_server_proc=None):
         if return_code is not None:
             raise RuntimeError(f"Server process already exit with returncode {return_code}")
 
-    def invoke(self, data):
+    def invoke(self, data, params: Optional[Dict[str, Any]] = None):
         """
         Invoke inference on input data. The input data must be pandas dataframe or numpy array or
         a dict of numpy arrays.
+
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+        :return: :py:class:`PredictionsResponse <mlflow.deployments.PredictionsResponse>` result.
         """
         if not self.output_json.exists():
             self.output_json.touch()
@@ -100,7 +123,7 @@ def invoke(self, data):
         request_id = str(uuid.uuid4())
         request = {
             "id": request_id,
-            "data": dump_input_data(data),
+            "data": dump_input_data(data, params=params),
             "output_file": str(self.output_json),
         }
         self.process.stdin.write(json.dumps(request) + "\n")
diff --git a/mlflow/pyfunc/stdin_server.py b/mlflow/pyfunc/stdin_server.py
index 9d5315b8bc4cdd..1b3a5a00af75e9 100644
--- a/mlflow/pyfunc/stdin_server.py
+++ b/mlflow/pyfunc/stdin_server.py
@@ -1,10 +1,12 @@
 import argparse
 import sys
+import inspect
 import json
 import logging
 
 import mlflow
 from mlflow.pyfunc import scoring_server
+from mlflow.pyfunc.model import _log_warning_if_params_not_in_predict_signature
 
 _logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -25,10 +27,15 @@
 
     _logger.info("Parsing input data")
     data = request["data"]
-    data = scoring_server.infer_and_parse_json_input(data, input_schema)
+    data, params = scoring_server._split_data_and_params(data)
+    data = scoring_server.infer_and_parse_data(data, input_schema)
 
     _logger.info("Making predictions")
-    preds = model.predict(data)
+    if inspect.signature(model.predict).parameters.get("params"):
+        preds = model.predict(data, params=params)
+    else:
+        _log_warning_if_params_not_in_predict_signature(_logger, params)
+        preds = model.predict(data)
 
     _logger.info("Writing predictions")
     with open(request["output_file"], "a") as f:
diff --git a/mlflow/pytorch/__init__.py b/mlflow/pytorch/__init__.py
index 11e7e16817a577..de28e52c7e44fc 100644
--- a/mlflow/pytorch/__init__.py
+++ b/mlflow/pytorch/__init__.py
@@ -13,6 +13,7 @@
 import yaml
 import warnings
 import atexit
+from typing import Any, Dict, Optional
 
 import numpy as np
 import pandas as pd
@@ -727,9 +728,19 @@ class _PyTorchWrapper:
     def __init__(self, pytorch_model):
         self.pytorch_model = pytorch_model
 
-    def predict(self, data, device=None):
+    def predict(self, data, params: Optional[Dict[str, Any]] = None):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import torch
 
+        device = params.get("device", None) if params else None
         # if CUDA is available, we use the default CUDA device.
         # To force inference to the CPU when the GPU is available, please set
         # MLFLOW_DEFAULT_PREDICTION_DEVICE to "cpu"
diff --git a/mlflow/recipes/utils/wrapped_recipe_model.py b/mlflow/recipes/utils/wrapped_recipe_model.py
index 62f7ab57ff39d8..24f19fa5c6c1c8 100644
--- a/mlflow/recipes/utils/wrapped_recipe_model.py
+++ b/mlflow/recipes/utils/wrapped_recipe_model.py
@@ -2,6 +2,7 @@
 from mlflow.pyfunc import PythonModel
 import pandas as pd
 import numpy as np
+from typing import Any, Dict, Optional
 
 
 class WrappedRecipeModel(PythonModel):
@@ -16,7 +17,23 @@ def __init__(
     def load_context(self, context):
         self._classifier = mlflow.sklearn.load_model(context.artifacts["model_path"])
 
-    def predict(self, context, model_input):
+    def predict(
+        self,
+        context,
+        model_input,
+        params: Optional[Dict[str, Any]] = None,  # pylint: disable=unused-argument
+    ):
+        """
+        :param context: A :class:`~PythonModelContext` instance containing artifacts that the model
+                        can use to perform inference.
+        :param model_input: A pyfunc-compatible input for the model to evaluate.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         predicted_label = self._classifier.predict(model_input)
         # Only classification recipe would be have multiple classes in the target column
         # So if it doesn't have multiple classes, return back the predicted_label
diff --git a/mlflow/sagemaker/__init__.py b/mlflow/sagemaker/__init__.py
index 89fa8db0d6373e..14cf11394a0eb1 100644
--- a/mlflow/sagemaker/__init__.py
+++ b/mlflow/sagemaker/__init__.py
@@ -11,6 +11,7 @@
 import platform
 import json
 import signal
+from typing import Any, Dict, Optional
 
 import mlflow
 import mlflow.version
@@ -2659,7 +2660,13 @@ def get_deployment(self, name, endpoint=None):
                 message=f"There was an error while retrieving the deployment: {exc}\n"
             )
 
-    def predict(self, deployment_name=None, inputs=None, endpoint=None):
+    def predict(
+        self,
+        deployment_name=None,
+        inputs=None,
+        endpoint=None,  # pylint: disable=unused-argument
+        params: Optional[Dict[str, Any]] = None,
+    ):
         """
         Compute predictions from the specified deployment using the provided PyFunc input.
 
@@ -2715,7 +2722,7 @@ def predict(self, deployment_name=None, inputs=None, endpoint=None):
             )
             response = sage_client.invoke_endpoint(
                 EndpointName=deployment_name,
-                Body=dump_input_data(inputs, inputs_key="instances"),
+                Body=dump_input_data(inputs, inputs_key="instances", params=params),
                 ContentType="application/json",
             )
             response_body = response["Body"].read().decode("utf-8")
diff --git a/mlflow/sentence_transformers.py b/mlflow/sentence_transformers.py
index cfbcd220bf2a53..5335bc475e5b0a 100644
--- a/mlflow/sentence_transformers.py
+++ b/mlflow/sentence_transformers.py
@@ -8,6 +8,7 @@
 
 import mlflow
 from mlflow import pyfunc
+from mlflow.exceptions import MlflowException
 from mlflow.models import ModelInputExample, ModelSignature, Model, infer_pip_requirements
 from mlflow.models.model import MLMODEL_FILE_NAME
 from mlflow.models.signature import _infer_signature_from_input_example
@@ -332,7 +333,16 @@ class _SentenceTransformerModelWrapper:
     def __init__(self, model):
         self.model = model
 
-    def predict(self, sentences):
+    def predict(self, sentences, params: Optional[Dict[str, Any]] = None):
+        """
+        :param sentences: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         # When the input is a single string, it is transformed into a DataFrame with one column
         # and row, but the encode function does not accept DataFrame input
         if type(sentences) == pd.DataFrame:
@@ -340,4 +350,11 @@ def predict(self, sentences):
 
         # The encode API has additional parameters that we can add as kwargs.
         # See https://www.sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.encode
-        return self.model.encode(sentences)  # numpy array
+        if params:
+            try:
+                return self.model.encode(sentences, **params)
+            except TypeError as e:
+                raise MlflowException.invalid_parameter_value(
+                    "Received invalid parameter value for `params` argument"
+                ) from e
+        return self.model.encode(sentences)
diff --git a/mlflow/shap.py b/mlflow/shap.py
index 108612edb349ea..5f1e2098bf5fdb 100644
--- a/mlflow/shap.py
+++ b/mlflow/shap.py
@@ -3,6 +3,7 @@
 import tempfile
 import yaml
 import warnings
+from typing import Any, Dict, Optional
 
 import numpy as np
 
@@ -664,5 +665,16 @@ def __init__(self, path):
 
         self.explainer = _load_explainer(explainer_file=shap_explainer_artifacts_path, model=model)
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         return self.explainer(dataframe.values).values
diff --git a/mlflow/sklearn/__init__.py b/mlflow/sklearn/__init__.py
index 37b678704be4c2..a402d679ffb54a 100644
--- a/mlflow/sklearn/__init__.py
+++ b/mlflow/sklearn/__init__.py
@@ -21,6 +21,7 @@
 from collections import defaultdict, OrderedDict
 from copy import deepcopy
 from packaging.version import Version
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -489,7 +490,36 @@ def _load_pyfunc(path):
         )
         path = os.path.join(path, pyfunc_flavor_conf["model_path"])
 
-    return _load_model_from_local_file(path=path, serialization_format=serialization_format)
+    return _SklearnModelWrapper(
+        _load_model_from_local_file(path=path, serialization_format=serialization_format)
+    )
+
+
+class _SklearnModelWrapper:
+    def __init__(self, sklearn_model):
+        self.sklearn_model = sklearn_model
+
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
+        return self.sklearn_model.predict(data)
+
+    def predict_proba(self, *args, **kwargs):
+        if hasattr(self.sklearn_model, "predict_proba"):
+            return self.sklearn_model.predict_proba(*args, **kwargs)
+
+    def score(self, *args, **kwargs):
+        if hasattr(self.sklearn_model, "score"):
+            return self.sklearn_model.score(*args, **kwargs)
 
 
 class _SklearnCustomModelPicklingError(pickle.PicklingError):
diff --git a/mlflow/spacy.py b/mlflow/spacy.py
index ea1e512f68e690..945d9569264a9c 100644
--- a/mlflow/spacy.py
+++ b/mlflow/spacy.py
@@ -11,6 +11,7 @@
 """
 import logging
 import os
+from typing import Any, Dict, Optional
 
 import pandas as pd
 import yaml
@@ -282,12 +283,19 @@ class _SpacyModelWrapper:
     def __init__(self, spacy_model):
         self.spacy_model = spacy_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
         """
         Only works for predicting using text categorizer.
         Not suitable for other pipeline components (e.g: parser)
         :param dataframe: pandas dataframe containing texts to be categorized
                           expected shape is (n_rows,1 column)
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
         :return: dataframe with predictions
         """
         if len(dataframe.columns) != 1:
diff --git a/mlflow/spark.py b/mlflow/spark.py
index a6a60c816bc799..f7a5ccc7843b61 100644
--- a/mlflow/spark.py
+++ b/mlflow/spark.py
@@ -25,6 +25,7 @@
 import shutil
 import yaml
 from packaging.version import Version
+from typing import Any, Dict, Optional
 import pandas as pd
 
 import mlflow
@@ -891,11 +892,18 @@ def __init__(self, spark, spark_model):
         self.spark = spark
         self.spark_model = spark_model
 
-    def predict(self, pandas_df):
+    def predict(
+        self, pandas_df, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
         """
         Generate predictions given input data in a pandas DataFrame.
 
         :param pandas_df: pandas DataFrame containing input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
         :return: List with model predictions.
         """
         from pyspark.ml import PipelineModel
diff --git a/mlflow/statsmodels.py b/mlflow/statsmodels.py
index 7fe4c160dce74e..b1dfd7a66a52da 100644
--- a/mlflow/statsmodels.py
+++ b/mlflow/statsmodels.py
@@ -15,6 +15,7 @@
 import logging
 import os
 import yaml
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -327,7 +328,18 @@ class _StatsmodelsModelWrapper:
     def __init__(self, statsmodels_model):
         self.statsmodels_model = statsmodels_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         from statsmodels.tsa.base.tsa_model import TimeSeriesModel
 
         model = self.statsmodels_model.model
diff --git a/mlflow/tensorflow/__init__.py b/mlflow/tensorflow/__init__.py
index d82175e84bbbb8..2d73bc69ec0587 100644
--- a/mlflow/tensorflow/__init__.py
+++ b/mlflow/tensorflow/__init__.py
@@ -18,6 +18,7 @@
 import pandas
 from packaging.version import Version
 from threading import RLock
+from typing import Any, Dict, Optional
 import numpy as np
 import importlib
 import yaml
@@ -758,7 +759,18 @@ def __init__(self, model, infer):
         self.model = model
         self.infer = infer
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import tensorflow
 
         feed_dict = {}
@@ -802,7 +814,18 @@ def __init__(self, model, signature):
         self.model = model
         self.signature = signature
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import tensorflow
 
         if isinstance(data, (np.ndarray, list)):
@@ -823,7 +846,18 @@ def __init__(self, keras_model, signature):
         self.keras_model = keras_model
         self.signature = signature
 
-    def predict(self, data):
+    def predict(
+        self, data, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         if isinstance(data, pandas.DataFrame):
             # This line is for backwards compatibility:
             # If model signature is not None, when calling
diff --git a/mlflow/transformers.py b/mlflow/transformers.py
index a9175261ce531f..df7945d7df168d 100644
--- a/mlflow/transformers.py
+++ b/mlflow/transformers.py
@@ -1602,7 +1602,57 @@ def _convert_pandas_to_dict(self, data):
                     )
             return parsed
 
-    def predict(self, data, device=None):
+    def _override_inference_config(self, params):
+        if params:
+            _logger.warning(
+                "params provided to the `predict` method will override the inference "
+                "configuration saved with the model. If the params provided are not "
+                "valid for the pipeline, MlflowException will be raised."
+            )
+
+            # Override the inference configuration with any additional kwargs provided by the user.
+            self.inference_config.update(params)
+
+    def _validate_inference_config_and_return_output(self, data):
+        import transformers
+
+        try:
+            if isinstance(data, dict):
+                return self.pipeline(**data, **self.inference_config)
+            return self.pipeline(data, **self.inference_config)
+        except ValueError as e:
+            if "The following `model_kwargs` are not used by the model" in str(e):
+                raise MlflowException.invalid_parameter_value(
+                    "The params provided to the `predict` method are not valid "
+                    f"for pipeline {type(self.pipeline).__name__}.",
+                ) from e
+            if isinstance(
+                self.pipeline,
+                (
+                    transformers.AutomaticSpeechRecognitionPipeline,
+                    transformers.AudioClassificationPipeline,
+                ),
+            ) and "Malformed soundfile" in str(e):
+                raise MlflowException.invalid_parameter_value(
+                    "Failed to process the input audio data. Either the audio file is "
+                    "corrupted or a uri was passed in without overriding the default model "
+                    "signature. If submitting a string uri, please ensure that the model has "
+                    "been saved with a signature that defines a string input type.",
+                ) from e
+            raise
+
+    def predict(self, data, params: Optional[Dict[str, Any]] = None):
+        """
+        :param data: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
+        self._override_inference_config(params)
+
         if isinstance(data, pd.DataFrame):
             input_data = self._convert_pandas_to_dict(data)
         elif isinstance(data, dict):
@@ -1640,11 +1690,11 @@ def predict(self, data, device=None):
                 for x in input_data
             )
 
-        predictions = self._predict(input_data, device)
+        predictions = self._predict(input_data)
 
         return predictions
 
-    def _predict(self, data, device):
+    def _predict(self, data):
         import transformers
 
         # NB: the ordering of these conditional statements matters. TranslationPipeline and
@@ -1712,8 +1762,6 @@ def _predict(self, data, device):
         include_prompt = self.inference_config.pop("include_prompt", True)
         # Optional stripping out of `\n` for specific generator pipelines.
         collapse_whitespace = self.inference_config.pop("collapse_whitespace", False)
-        if device is not None:
-            self.inference_config["device"] = device
 
         data = self._convert_cast_lists_from_np_back_to_list(data)
 
@@ -1721,30 +1769,8 @@ def _predict(self, data, device):
         if isinstance(self.pipeline, transformers.ConversationalPipeline):
             conversation_output = self.pipeline(self._conversation)
             return conversation_output.generated_responses[-1]
-
-        if isinstance(
-            self.pipeline,
-            (
-                transformers.AutomaticSpeechRecognitionPipeline,
-                transformers.AudioClassificationPipeline,
-            ),
-        ):
-            try:
-                raw_output = self.pipeline(data, **self.inference_config)
-            except ValueError as e:
-                if "Malformed soundfile" in str(e):
-                    raise MlflowException(
-                        "Failed to process the input audio data. Either the audio file is "
-                        "corrupted or a uri was passed in without overriding the default model "
-                        "signature. If submitting a string uri, please ensure that the model has "
-                        "been saved with a signature that defines a string input type.",
-                        error_code=INVALID_PARAMETER_VALUE,
-                    ) from e
-                raise
-        elif isinstance(data, dict):
-            raw_output = self.pipeline(**data, **self.inference_config)
         else:
-            raw_output = self.pipeline(data, **self.inference_config)
+            raw_output = self._validate_inference_config_and_return_output(data)
 
         # Handle the pipeline outputs
         if type(self.pipeline).__name__ in self._supported_custom_generator_types or isinstance(
@@ -2079,12 +2105,12 @@ def _parse_lists_of_dict_to_list_of_str(self, output_data, target_dict_key) -> L
                         elif isinstance(value, list) and all(
                             isinstance(elem, dict) for elem in value
                         ):
-                            output_coll.append(
-                                self._parse_lists_of_dict_to_list_of_str(value, target_dict_key)[0]
+                            output_coll.extend(
+                                self._parse_lists_of_dict_to_list_of_str(value, target_dict_key)
                             )
                 elif isinstance(output, list):
-                    output_coll.append(
-                        self._parse_lists_of_dict_to_list_of_str(output, target_dict_key)[0]
+                    output_coll.extend(
+                        self._parse_lists_of_dict_to_list_of_str(output, target_dict_key)
                     )
             return output_coll
         elif target_dict_key:
diff --git a/mlflow/types/__init__.py b/mlflow/types/__init__.py
index 60511d26a04cd0..36e7a58f1575ec 100644
--- a/mlflow/types/__init__.py
+++ b/mlflow/types/__init__.py
@@ -3,6 +3,6 @@
 components to describe interface independent of other frameworks or languages.
 """
 
-from .schema import DataType, ColSpec, Schema, TensorSpec
+from .schema import DataType, ColSpec, ParamSchema, Schema, TensorSpec, ParamSpec
 
-__all__ = ["Schema", "ColSpec", "DataType", "TensorSpec"]
+__all__ = ["Schema", "ColSpec", "DataType", "TensorSpec", "ParamSchema", "ParamSpec"]
diff --git a/mlflow/types/schema.py b/mlflow/types/schema.py
index a3bcf4bf0ed5ce..ca51876bfe9067 100644
--- a/mlflow/types/schema.py
+++ b/mlflow/types/schema.py
@@ -1,9 +1,12 @@
-import json
+import builtins
+import datetime as dt
 from enum import Enum
+import importlib.util
+import json
 
 import numpy as np
 import string
-from typing import Dict, Any, List, Union, Optional
+from typing import Dict, Any, List, Union, Optional, Tuple, TypedDict
 
 from mlflow.exceptions import MlflowException
 from mlflow.utils.annotations import experimental
@@ -14,33 +17,40 @@ class DataType(Enum):
     MLflow data types.
     """
 
-    def __new__(cls, value, numpy_type, spark_type, pandas_type=None):
+    def __new__(cls, value, numpy_type, spark_type, pandas_type=None, python_type=None):
         res = object.__new__(cls)
         res._value_ = value
         res._numpy_type = numpy_type
         res._spark_type = spark_type
         res._pandas_type = pandas_type if pandas_type is not None else numpy_type
+        res._python_type = python_type if python_type is not None else numpy_type
         return res
 
     # NB: We only use pandas extension type for strings. There are also pandas extension types for
     # integers and boolean values. We do not use them here for now as most downstream tools are
     # most likely to use / expect native numpy types and would not be compatible with the extension
     # types.
-    boolean = (1, np.dtype("bool"), "BooleanType")
+    boolean = (1, np.dtype("bool"), "BooleanType", np.dtype("bool"), bool)
     """Logical data (True, False) ."""
-    integer = (2, np.dtype("int32"), "IntegerType")
+    integer = (2, np.dtype("int32"), "IntegerType", np.dtype("int32"), int)
     """32b signed integer numbers."""
-    long = (3, np.dtype("int64"), "LongType")
+    long = (3, np.dtype("int64"), "LongType", np.dtype("int64"), int)
     """64b signed integer numbers. """
-    float = (4, np.dtype("float32"), "FloatType")
+    float = (4, np.dtype("float32"), "FloatType", np.dtype("float32"), builtins.float)
     """32b floating point numbers. """
-    double = (5, np.dtype("float64"), "DoubleType")
+    double = (5, np.dtype("float64"), "DoubleType", np.dtype("float64"), builtins.float)
     """64b floating point numbers. """
-    string = (6, np.dtype("str"), "StringType", object)
+    string = (6, np.dtype("str"), "StringType", object, str)
     """Text data."""
-    binary = (7, np.dtype("bytes"), "BinaryType", object)
+    binary = (7, np.dtype("bytes"), "BinaryType", object, bytes)
     """Sequence of raw bytes."""
-    datetime = (8, np.dtype("datetime64[ns]"), "TimestampType")
+    datetime = (
+        8,
+        np.dtype("datetime64[ns]"),
+        "TimestampType",
+        np.dtype("datetime64[ns]"),
+        dt.date,
+    )
     """64b datetime data."""
 
     def __repr__(self):
@@ -59,6 +69,50 @@ def to_spark(self):
 
         return getattr(pyspark.sql.types, self._spark_type)()
 
+    def to_python(self):
+        """Get equivalent python data type."""
+        return self._python_type
+
+    @classmethod
+    def is_boolean(cls, value):
+        return type(value) in DataType.boolean.get_all_types()
+
+    @classmethod
+    def is_integer(cls, value):
+        return type(value) in DataType.integer.get_all_types()
+
+    @classmethod
+    def is_long(cls, value):
+        return type(value) in DataType.long.get_all_types()
+
+    @classmethod
+    def is_float(cls, value):
+        return type(value) in DataType.float.get_all_types()
+
+    @classmethod
+    def is_double(cls, value):
+        return type(value) in DataType.double.get_all_types()
+
+    @classmethod
+    def is_string(cls, value):
+        return type(value) in DataType.string.get_all_types()
+
+    @classmethod
+    def is_binary(cls, value):
+        return type(value) in DataType.binary.get_all_types()
+
+    @classmethod
+    def is_datetime(cls, value):
+        return type(value) in DataType.datetime.get_all_types()
+
+    def get_all_types(self):
+        types = [self.to_numpy(), self.to_pandas(), self.to_python()]
+        if importlib.util.find_spec("pyspark") is not None:
+            types.append(self.to_spark())
+        if self.name == "datetime":
+            types.extend([np.datetime64, dt.datetime])
+        return types
+
     @classmethod
     def get_spark_types(cls):
         return [dt.to_spark() for dt in cls._member_map_.values()]
@@ -414,3 +468,292 @@ def __eq__(self, other) -> bool:
 
     def __repr__(self) -> str:
         return repr(self.inputs)
+
+
+@experimental
+class ParamSpec:
+    """
+    Specification used to represent parameters for the model.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        dtype: Union[DataType, str],
+        default: Union[DataType, List[DataType], None],
+        shape: Optional[Tuple[int, ...]] = None,
+    ):
+        self._name = str(name)
+        self._shape = tuple(shape) if shape is not None else None
+
+        try:
+            self._dtype = DataType[dtype] if isinstance(dtype, str) else dtype
+        except KeyError:
+            supported_types = [t.name for t in DataType if t.name != "binary"]
+            raise MlflowException.invalid_parameter_value(
+                f"Unsupported type '{dtype}', expected instance of DataType or "
+                f"one of {supported_types}",
+            )
+        if not isinstance(self.dtype, DataType):
+            raise TypeError(
+                "Expected mlflow.models.signature.Datatype or str for the 'dtype' "
+                f"argument, but got {self.dtype.__class__}"
+            )
+        if self.dtype == DataType.binary:
+            raise MlflowException.invalid_parameter_value(
+                f"Binary type is not supported for parameters, ParamSpec '{self.name}'"
+                "has dtype 'binary'",
+            )
+
+        # This line makes sure repr(self) works fine
+        self._default = default
+        self._default = self.validate_type_and_shape(repr(self), default, self.dtype, self.shape)
+
+    @classmethod
+    def validate_param_spec(
+        cls, value: Union[DataType, List[DataType], None], param_spec: "ParamSpec"
+    ):
+        return cls.validate_type_and_shape(
+            repr(param_spec), value, param_spec.dtype, param_spec.shape
+        )
+
+    @classmethod
+    def enforce_param_datatype(cls, name, value, dtype: DataType):
+        """
+        Enforce the value matches the data type.
+
+        The following type conversions are allowed:
+
+        1. int -> long, float, double
+        2. long -> float, double
+        3. float -> double
+        4. any -> datetime (try conversion)
+
+        Any other type mismatch will raise error.
+
+        :param name: parameter name
+        :param value: parameter value
+        :param t: expected data type
+        """
+        if value is None:
+            return
+
+        if dtype == DataType.datetime:
+            try:
+                datetime_value = np.datetime64(value).item()
+                if isinstance(datetime_value, int):
+                    raise MlflowException.invalid_parameter_value(
+                        f"Invalid value for param {name}, it should "
+                        f"be convertible to datetime.date/datetime, got {value}"
+                    )
+                return datetime_value
+            except ValueError as e:
+                raise MlflowException.invalid_parameter_value(
+                    f"Failed to convert value {value} from type {type(value).__name__} "
+                    f"to {dtype} for param {name}"
+                ) from e
+
+        # Note that np.isscalar(datetime.date(...)) is False
+        if not np.isscalar(value):
+            raise MlflowException.invalid_parameter_value(
+                f"Value should be a scalar for param {name}, got {value}"
+            )
+
+        # Always convert to python native type for params
+        if getattr(DataType, f"is_{dtype.name}")(value):
+            return DataType[dtype.name].to_python()(value)
+
+        if (
+            (
+                DataType.is_integer(value)
+                and dtype in (DataType.long, DataType.float, DataType.double)
+            )
+            or (DataType.is_long(value) and dtype in (DataType.float, DataType.double))
+            or (DataType.is_float(value) and dtype == DataType.double)
+        ):
+            try:
+                return DataType[dtype.name].to_python()(value)
+            except ValueError as e:
+                raise MlflowException.invalid_parameter_value(
+                    f"Failed to convert value {value} from type {type(value).__name__} "
+                    f"to {dtype} for param {name}"
+                ) from e
+
+        raise MlflowException.invalid_parameter_value(
+            f"Incompatible types for param {name}. Can not safely convert {type(value).__name__} "
+            f"to {dtype}.",
+        )
+
+    @classmethod
+    def validate_type_and_shape(
+        cls,
+        spec: str,
+        value: Union[DataType, List[DataType], None],
+        value_type: DataType,
+        shape: Optional[Tuple[int, ...]],
+    ):
+        """
+        Validate that the value has the expected type and shape.
+        """
+
+        def _is_1d_array(value):
+            return isinstance(value, (list, np.ndarray)) and np.array(value).ndim == 1
+
+        if shape is None:
+            return cls.enforce_param_datatype(f"{spec} with shape None", value, value_type)
+        elif shape == (-1,):
+            if not _is_1d_array(value):
+                raise MlflowException.invalid_parameter_value(
+                    f"Value must be a 1D array with shape (-1,) for param {spec}, "
+                    f"received {type(value).__name__} with ndim {np.array(value).ndim}",
+                )
+            return [
+                cls.enforce_param_datatype(f"{spec} internal values", v, value_type) for v in value
+            ]
+        else:
+            raise MlflowException.invalid_parameter_value(
+                "Shape must be None for scalar value or (-1,) for 1D array value "
+                f"for ParamSpec {spec}), received {shape}",
+            )
+
+    @property
+    def name(self) -> str:
+        """The name of the parameter."""
+        return self._name
+
+    @property
+    def dtype(self) -> DataType:
+        """The parameter data type."""
+        return self._dtype
+
+    @property
+    def default(self) -> Union[DataType, List[DataType], None]:
+        """Default value of the parameter."""
+        return self._default
+
+    @property
+    def shape(self) -> Optional[tuple]:
+        """
+        The parameter shape.
+        If shape is None, the parameter is a scalar.
+        """
+        return self._shape
+
+    class ParamSpecTypedDict(TypedDict):
+        name: str
+        dtype: str
+        default: Union[DataType, List[DataType], None]
+        shape: Optional[Tuple[int, ...]]
+
+    def to_dict(self) -> ParamSpecTypedDict:
+        if self.shape is None:
+            default_value = (
+                self.default.isoformat() if self.dtype.name == "datetime" else self.default
+            )
+        elif self.shape == (-1,):
+            default_value = (
+                [v.isoformat() for v in self.default]
+                if self.dtype.name == "datetime"
+                else self.default
+            )
+        return {
+            "name": self.name,
+            "dtype": self.dtype.name,
+            "default": default_value,
+            "shape": self.shape,
+        }
+
+    def __eq__(self, other) -> bool:
+        if isinstance(other, ParamSpec):
+            return (
+                self.name == other.name
+                and self.dtype == other.dtype
+                and self.default == other.default
+                and self.shape == other.shape
+            )
+        return False
+
+    def __repr__(self) -> str:
+        shape = f" (shape: {self.shape})" if self.shape is not None else ""
+        return f"{self.name!r}: {self.dtype!r} (default: {self.default}){shape}"
+
+    @classmethod
+    def from_json_dict(cls, **kwargs):
+        """
+        Deserialize from a json loaded dictionary.
+        The dictionary is expected to contain `name`, `dtype` and `default` keys.
+        """
+        if not {"name", "dtype", "default"} <= set(kwargs.keys()):
+            raise MlflowException.invalid_parameter_value(
+                "Missing keys in ParamSpec JSON. Expected to find "
+                "keys `name`, `dtype` and `default`",
+            )
+        return cls(
+            name=str(kwargs["name"]),
+            dtype=DataType[kwargs["dtype"]],
+            default=kwargs["default"],
+            shape=kwargs.get("shape"),
+        )
+
+
+@experimental
+class ParamSchema:
+    """
+    Specification of parameters applicable to the model.
+    ParamSchema is represented as a list of :py:class:`ParamSpec`.
+    """
+
+    def __init__(self, params: List[ParamSpec]):
+        if not all(isinstance(x, ParamSpec) for x in params):
+            raise MlflowException.invalid_parameter_value(
+                f"ParamSchema inputs only accept {ParamSchema.__class__}"
+            )
+        if duplicates := self._find_duplicates(params):
+            raise MlflowException.invalid_parameter_value(
+                f"Duplicated parameters found in schema: {duplicates}"
+            )
+        self._params = params
+
+    @staticmethod
+    def _find_duplicates(params: List[ParamSpec]) -> List[str]:
+        param_names = [param_spec.name for param_spec in params]
+        uniq_param = set()
+        duplicates = []
+        for name in param_names:
+            if name in uniq_param:
+                duplicates.append(name)
+            else:
+                uniq_param.add(name)
+        return duplicates
+
+    def __len__(self):
+        return len(self._params)
+
+    def __iter__(self):
+        return iter(self._params)
+
+    @property
+    def params(self) -> List[ParamSpec]:
+        """Representation of ParamSchema as a list of ParamSpec."""
+        return self._params
+
+    def to_json(self) -> str:
+        """Serialize into json string."""
+        return json.dumps(self.to_dict())
+
+    @classmethod
+    def from_json(cls, json_str: str):
+        """Deserialize from a json string."""
+        return cls([ParamSpec.from_json_dict(**x) for x in json.loads(json_str)])
+
+    def to_dict(self) -> List[Dict[str, Any]]:
+        """Serialize into a jsonable dictionary."""
+        return [x.to_dict() for x in self.params]
+
+    def __eq__(self, other) -> bool:
+        if isinstance(other, ParamSchema):
+            return self.params == other.params
+        return False
+
+    def __repr__(self) -> str:
+        return repr(self.params)
diff --git a/mlflow/types/utils.py b/mlflow/types/utils.py
index 066b4a3d8f7993..d404b6947a3f8c 100644
--- a/mlflow/types/utils.py
+++ b/mlflow/types/utils.py
@@ -8,7 +8,7 @@
 from mlflow.exceptions import MlflowException
 from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
 from mlflow.types import DataType
-from mlflow.types.schema import Schema, ColSpec, TensorSpec
+from mlflow.types.schema import ColSpec, ParamSchema, ParamSpec, Schema, TensorSpec
 
 _logger = logging.getLogger(__name__)
 
@@ -470,3 +470,52 @@ def _infer_schema_from_type_hint(type_hint, examples=None):
     else:
         _logger.info("Unsupported type hint: %s, skipping schema inference", type_hint)
         return None
+
+
+def _infer_type_and_shape(value):
+    if isinstance(value, (list, np.ndarray, pd.Series)):
+        ndim = np.array(value).ndim
+        if ndim != 1:
+            raise MlflowException.invalid_parameter_value(
+                f"Expected parameters to be 1D array or scalar, got {ndim}D array",
+            )
+        if all(DataType.is_datetime(v) for v in value):
+            return DataType.datetime, (-1,)
+        value_type = _infer_numpy_dtype(np.array(value).dtype)
+        return value_type, (-1,)
+    elif DataType.is_datetime(value):
+        return DataType.datetime, None
+    elif np.isscalar(value):
+        try:
+            value_type = _infer_numpy_dtype(np.array(value).dtype)
+            return value_type, None
+        except (Exception, MlflowException) as e:
+            raise MlflowException.invalid_parameter_value(
+                f"Failed to infer schema for parameter {value}: {e!r}"
+            )
+    raise MlflowException.invalid_parameter_value(
+        f"Expected parameters to be 1D array or scalar, got {type(value).__name__}",
+    )
+
+
+def _infer_param_schema(parameters: Dict[str, Any]):
+    if not isinstance(parameters, dict):
+        raise MlflowException.invalid_parameter_value(
+            f"Expected parameters to be dict, got {type(parameters).__name__}",
+        )
+
+    param_specs = []
+    invalid_params = []
+    for name, value in parameters.items():
+        try:
+            value_type, shape = _infer_type_and_shape(value)
+            param_specs.append(ParamSpec(name=name, dtype=value_type, default=value, shape=shape))
+        except Exception as e:
+            invalid_params.append((name, value, e))
+
+    if invalid_params:
+        raise MlflowException.invalid_parameter_value(
+            f"Failed to infer schema for parameters: {invalid_params}",
+        )
+
+    return ParamSchema(param_specs)
diff --git a/mlflow/utils/proto_json_utils.py b/mlflow/utils/proto_json_utils.py
index 09329bf6fa1981..22f0b8dfb88351 100644
--- a/mlflow/utils/proto_json_utils.py
+++ b/mlflow/utils/proto_json_utils.py
@@ -1,5 +1,6 @@
 import base64
 import datetime
+from typing import Any, Dict, Optional
 
 import os
 import json
@@ -475,7 +476,15 @@ def get_jsonable_input(name, data):
         raise MlflowException(f"Incompatible input type:{type(data)} for input {name}.")
 
 
-def dump_input_data(data, inputs_key="inputs"):
+def dump_input_data(data, inputs_key="inputs", params: Optional[Dict[str, Any]] = None):
+    """
+    :param data: Input data.
+    :param inputs_key: Key to represent data in the request payload.
+    :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+    """
     import numpy as np
     import pandas as pd
 
@@ -485,9 +494,20 @@ def dump_input_data(data, inputs_key="inputs"):
         post_data = {inputs_key: {k: get_jsonable_input(k, v) for k, v in data}}
     elif isinstance(data, np.ndarray):
         post_data = {inputs_key: data.tolist()}
+    elif isinstance(data, list):
+        post_data = {inputs_key: data}
     else:
         post_data = data
 
+    if params is not None:
+        if not isinstance(params, dict):
+            raise MlflowException(
+                f"Params must be a dictionary. Got type '{type(params).__name__}'."
+            )
+        # if post_data is not dictionary, params should be included in post_data directly
+        if isinstance(post_data, dict):
+            post_data["params"] = params
+
     if not isinstance(post_data, str):
         post_data = json.dumps(post_data, cls=_CustomJsonEncoder)
 
diff --git a/mlflow/xgboost/__init__.py b/mlflow/xgboost/__init__.py
index 90357e29506a57..be5783bf3a7223 100644
--- a/mlflow/xgboost/__init__.py
+++ b/mlflow/xgboost/__init__.py
@@ -24,6 +24,7 @@
 import functools
 from copy import deepcopy
 from packaging.version import Version
+from typing import Any, Dict, Optional
 
 import mlflow
 from mlflow import pyfunc
@@ -345,7 +346,18 @@ class _XGBModelWrapper:
     def __init__(self, xgb_model):
         self.xgb_model = xgb_model
 
-    def predict(self, dataframe):
+    def predict(
+        self, dataframe, params: Optional[Dict[str, Any]] = None  # pylint: disable=unused-argument
+    ):
+        """
+        :param dataframe: Model input data.
+        :param params: Additional parameters to pass to the model for inference.
+
+                       .. Note:: Experimental: This parameter may change or be removed in a future
+                                               release without warning.
+
+        :return: Model predictions.
+        """
         import xgboost as xgb
 
         if isinstance(self.xgb_model, xgb.Booster):
diff --git a/tests/db/check_migration.py b/tests/db/check_migration.py
index 3e2e58b9ac4d26..98544524e82cc6 100644
--- a/tests/db/check_migration.py
+++ b/tests/db/check_migration.py
@@ -51,7 +51,7 @@
 
 
 class Model(mlflow.pyfunc.PythonModel):
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params=None):
         return [0]
 
 
diff --git a/tests/db/test_tracking_operations.py b/tests/db/test_tracking_operations.py
index 0f7ca841e8640d..e9e5a011d121e6 100644
--- a/tests/db/test_tracking_operations.py
+++ b/tests/db/test_tracking_operations.py
@@ -17,7 +17,7 @@ class Model(mlflow.pyfunc.PythonModel):
     def load_context(self, context):
         pass
 
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params=None):
         pass
 
 
diff --git a/tests/evaluate/test_default_evaluator.py b/tests/evaluate/test_default_evaluator.py
index ab92b5c847bd5f..4841b2c30036cf 100644
--- a/tests/evaluate/test_default_evaluator.py
+++ b/tests/evaluate/test_default_evaluator.py
@@ -11,6 +11,7 @@
 
 import mlflow
 from mlflow.exceptions import MlflowException
+from mlflow.models import Model
 from mlflow.models.evaluation.base import evaluate, make_metric
 from mlflow.models.evaluation.artifacts import (
     CsvEvaluationArtifact,
@@ -41,6 +42,7 @@
 from sklearn.preprocessing import FunctionTransformer
 from sklearn.datasets import load_iris, load_breast_cancer
 from sklearn.metrics import precision_score, recall_score, f1_score
+from sklearn.svm import LinearSVC
 
 from tempfile import TemporaryDirectory
 from os.path import join as path_join
@@ -2377,3 +2379,36 @@ def test_eval_results_table_json_can_be_prefixed_with_metric_prefix():
     client = mlflow.MlflowClient()
     artifacts = [a.path for a in client.list_artifacts(run.info.run_id)]
     assert f"{metric_prefix}eval_results_table.json" in artifacts
+
+
+@pytest.mark.parametrize(
+    "baseline_model_uri",
+    [("svm_model_uri")],
+    indirect=["baseline_model_uri"],
+)
+def test_default_evaluator_for_pyfunc_model(baseline_model_uri, breast_cancer_dataset):
+    data = load_breast_cancer()
+    raw_model = LinearSVC()
+    raw_model.fit(data.data, data.target)
+
+    mlflow_model = Model()
+    mlflow.pyfunc.add_to_model(mlflow_model, loader_module="mlflow.sklearn")
+    pyfunc_model = mlflow.pyfunc.PyFuncModel(model_meta=mlflow_model, model_impl=raw_model)
+
+    with mlflow.start_run() as run:
+        evaluate_model_helper(
+            pyfunc_model,
+            baseline_model_uri,
+            breast_cancer_dataset._constructor_args["data"],
+            model_type="classifier",
+            targets=breast_cancer_dataset._constructor_args["targets"],
+            evaluators="default",
+            eval_baseline_model_only=False,
+        )
+    run_data = get_run_data(run.info.run_id)
+    assert set(run_data.artifacts) == {
+        "confusion_matrix.png",
+        "shap_feature_importance_plot.png",
+        "shap_beeswarm_plot.png",
+        "shap_summary_plot.png",
+    }
diff --git a/tests/evaluate/test_evaluation.py b/tests/evaluate/test_evaluation.py
index 30c6c5f7cfc78a..d3153638b5607e 100644
--- a/tests/evaluate/test_evaluation.py
+++ b/tests/evaluate/test_evaluation.py
@@ -1138,7 +1138,7 @@ class EnvRestoringTestModel(mlflow.pyfunc.PythonModel):
         def __init__(self):
             pass
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             if sklearn.__version__ == "0.22.1":
                 pred_value = 1
             else:
diff --git a/tests/openai/test_openai_model_export.py b/tests/openai/test_openai_model_export.py
index 51fb3c310d926f..2ac8e232b92c7d 100644
--- a/tests/openai/test_openai_model_export.py
+++ b/tests/openai/test_openai_model_export.py
@@ -346,7 +346,7 @@ def test_spark_udf(tmp_path, spark):
 
 
 class ChatCompletionModel(mlflow.pyfunc.PythonModel):
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params=None):
         completion = openai.ChatCompletion.create(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": "What is MLflow?"}],
diff --git a/tests/pyfunc/test_model_export_with_class_and_artifacts.py b/tests/pyfunc/test_model_export_with_class_and_artifacts.py
index e2a83c41bdd2c3..fb5df1965ac97f 100644
--- a/tests/pyfunc/test_model_export_with_class_and_artifacts.py
+++ b/tests/pyfunc/test_model_export_with_class_and_artifacts.py
@@ -62,7 +62,7 @@ def load_context(self, context):
             # pylint: disable=attribute-defined-outside-init
             self.model = mlflow.sklearn.load_model(model_uri=context.artifacts["sk_model"])
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return self.predict_fn(self.model, model_input)
 
     return CustomSklearnModel
@@ -210,6 +210,49 @@ def test_predict(sk_model, model_input):
     )
 
 
+def test_python_model_predict_compatible_without_params(sklearn_knn_model, iris_data):
+    class CustomSklearnModelWithoutParams(mlflow.pyfunc.PythonModel):
+        def __init__(self, predict_fn):
+            self.predict_fn = predict_fn
+
+        def load_context(self, context):
+            super().load_context(context)
+            # pylint: disable=attribute-defined-outside-init
+            self.model = mlflow.sklearn.load_model(model_uri=context.artifacts["sk_model"])
+
+        def predict(self, context, model_input):
+            return self.predict_fn(self.model, model_input)
+
+    sklearn_artifact_path = "sk_model"
+    with mlflow.start_run():
+        model_info = mlflow.sklearn.log_model(
+            sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path
+        )
+        sklearn_model_uri = model_info.model_uri
+
+    def test_predict(sk_model, model_input):
+        return sk_model.predict(model_input) * 2
+
+    pyfunc_artifact_path = "pyfunc_model"
+    with mlflow.start_run() as run:
+        model_info = mlflow.pyfunc.log_model(
+            artifact_path=pyfunc_artifact_path,
+            artifacts={"sk_model": sklearn_model_uri},
+            python_model=CustomSklearnModelWithoutParams(test_predict),
+        )
+        pyfunc_model_uri = f"runs:/{run.info.run_id}/{pyfunc_artifact_path}"
+        assert model_info.model_uri == pyfunc_model_uri
+        pyfunc_model_path = _download_artifact_from_uri(pyfunc_model_uri)
+        model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))
+
+    loaded_pyfunc_model = mlflow.pyfunc.load_model(model_uri=pyfunc_model_uri)
+    assert model_config.to_yaml() == loaded_pyfunc_model.metadata.to_yaml()
+    np.testing.assert_array_equal(
+        loaded_pyfunc_model.predict(iris_data[0]),
+        test_predict(sk_model=sklearn_knn_model, model_input=iris_data[0]),
+    )
+
+
 def test_signature_and_examples_are_saved_correctly(iris_data, main_scoped_model_class, tmp_path):
     sklearn_model_path = str(tmp_path.joinpath("sklearn_model"))
     mlflow.sklearn.save_model(sk_model=sklearn_knn_model, path=sklearn_model_path)
@@ -798,7 +841,7 @@ def test_save_model_correctly_resolves_directory_artifact_with_nested_contents(
         f.write(nested_file_text)
 
     class ArtifactValidationModel(mlflow.pyfunc.PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             expected_file_path = os.path.join(
                 context.artifacts["testdir"], nested_file_relative_path
             )
@@ -937,7 +980,7 @@ def test_repr_can_be_called_withtout_run_id_or_artifact_path():
     )
 
     class TestModel:
-        def predict(self, model_input):
+        def predict(self, model_input, params=None):
             return model_input
 
     model_impl = TestModel()
@@ -949,7 +992,7 @@ def test_load_model_with_differing_cloudpickle_version_at_micro_granularity_logs
     model_path,
 ):
     class TestModel(mlflow.pyfunc.PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return model_input
 
     mlflow.pyfunc.save_model(path=model_path, python_model=TestModel())
@@ -984,7 +1027,7 @@ def custom_warn(message_text, *args, **kwargs):
 
 def test_load_model_with_missing_cloudpickle_version_logs_warning(model_path):
     class TestModel(mlflow.pyfunc.PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return model_input
 
     mlflow.pyfunc.save_model(path=model_path, python_model=TestModel())
@@ -1092,7 +1135,7 @@ def __init__(self) -> None:
 
         self.model = LinearRegression()
 
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params=None):
         return self.model.predict(model_input)
 
 
@@ -1349,7 +1392,7 @@ def test_functional_python_model_throws_when_required_arguments_are_missing(tmp_
 
 
 class AnnotatedPythonModel(mlflow.pyfunc.PythonModel):
-    def predict(self, context: Dict[str, Any], model_input: List[str]) -> List[str]:
+    def predict(self, context: Dict[str, Any], model_input: List[str], params=None) -> List[str]:
         assert isinstance(model_input, list)
         assert all(isinstance(x, str) for x in model_input)
         return model_input
@@ -1362,3 +1405,18 @@ def test_class_python_model_type_hints(tmp_path):
     assert model.signature.outputs.to_dict() == [{"type": "string"}]
     model = mlflow.pyfunc.load_model(tmp_path)
     assert model.predict(["a", "b"]) == ["a", "b"]
+
+
+def test_python_model_predict_with_params():
+    signature = infer_signature(["input1", "input2"], params={"foo": [8]})
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=AnnotatedPythonModel(),
+            artifact_path="test_model",
+            signature=signature,
+        )
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+    assert loaded_model.predict(["a", "b"], params={"foo": [0, 1]}) == ["a", "b"]
+    assert loaded_model.predict(["a", "b"], params={"foo": np.array([0, 1])}) == ["a", "b"]
diff --git a/tests/pyfunc/test_pyfunc_class_methods.py b/tests/pyfunc/test_pyfunc_class_methods.py
index 28a08712d6925c..da9767048aa65e 100644
--- a/tests/pyfunc/test_pyfunc_class_methods.py
+++ b/tests/pyfunc/test_pyfunc_class_methods.py
@@ -8,7 +8,7 @@ def __init__(self, param_1: str, param_2: int):
             self.param_1 = param_1
             self.param_2 = param_2
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return model_input + self.param_2
 
         def upper_param_1(self):
diff --git a/tests/pyfunc/test_pyfunc_schema_enforcement.py b/tests/pyfunc/test_pyfunc_schema_enforcement.py
index b0f12e97095f81..380ca0148ce5d7 100644
--- a/tests/pyfunc/test_pyfunc_schema_enforcement.py
+++ b/tests/pyfunc/test_pyfunc_schema_enforcement.py
@@ -1,27 +1,114 @@
 import base64
+import cloudpickle
+import datetime
 import decimal
+import json
 import numpy as np
+from packaging.version import Version
 import pandas as pd
 import pytest
 import re
 import sklearn.linear_model
+from unittest import mock
 
 import mlflow
 from mlflow.exceptions import MlflowException
 
 from mlflow.models import infer_signature, Model, ModelSignature
-from mlflow.models.utils import _enforce_schema
+from mlflow.models.utils import _enforce_params_schema, _enforce_schema
 from mlflow.pyfunc import PyFuncModel
+import mlflow.pyfunc.scoring_server as pyfunc_scoring_server
+from mlflow.types import Schema, ColSpec, TensorSpec, ParamSchema, ParamSpec, DataType
+from mlflow.utils.proto_json_utils import dump_input_data
 
-from mlflow.types import Schema, ColSpec, TensorSpec
+from tests.helper_functions import pyfunc_serve_and_score_model
 
 
 class TestModel:
     @staticmethod
-    def predict(pdf):
+    def predict(pdf, params=None):
         return pdf
 
 
+@pytest.fixture(scope="module")
+def sample_params_basic():
+    return {
+        "str_param": "str_a",
+        "int_param": np.int32(1),
+        "bool_param": True,
+        "double_param": 1.0,
+        "float_param": np.float32(0.1),
+        "long_param": 100,
+        "datetime_param": np.datetime64("2023-06-26 00:00:00"),
+        "str_list": ["a", "b", "c"],
+        "bool_list": [True, False],
+        "double_array": np.array([1.0, 2.0]),
+    }
+
+
+@pytest.fixture(scope="module")
+def param_schema_basic():
+    return ParamSchema(
+        [
+            ParamSpec("str_param", DataType.string, "str_a", None),
+            ParamSpec("int_param", DataType.integer, np.int32(1), None),
+            ParamSpec("bool_param", DataType.boolean, True, None),
+            ParamSpec("double_param", DataType.double, 1.0, None),
+            ParamSpec("float_param", DataType.float, np.float32(0.1), None),
+            ParamSpec("long_param", DataType.long, 100, None),
+            ParamSpec(
+                "datetime_param", DataType.datetime, np.datetime64("2023-06-26 00:00:00"), None
+            ),
+            ParamSpec("str_list", DataType.string, ["a", "b", "c"], (-1,)),
+            ParamSpec("bool_list", DataType.boolean, [True, False], (-1,)),
+            ParamSpec("double_array", DataType.double, [1.0, 2.0], (-1,)),
+        ]
+    )
+
+
+class PythonModelWithBasicParams(mlflow.pyfunc.PythonModel):
+    def predict(self, context, model_input, params=None):
+        assert isinstance(params, dict)
+        assert DataType.is_string(params["str_param"])
+        assert DataType.is_integer(params["int_param"])
+        assert DataType.is_boolean(params["bool_param"])
+        assert DataType.is_double(params["double_param"])
+        assert DataType.is_float(params["float_param"])
+        assert DataType.is_long(params["long_param"])
+        assert DataType.is_datetime(params["datetime_param"])
+        assert isinstance(params["str_list"], list)
+        assert all(DataType.is_string(x) for x in params["str_list"])
+        assert isinstance(params["bool_list"], list)
+        assert all(DataType.is_boolean(x) for x in params["bool_list"])
+        assert isinstance(params["double_array"], list)
+        assert all(DataType.is_double(x) for x in params["double_array"])
+        return params
+
+
+@pytest.fixture(scope="module")
+def sample_params_with_arrays():
+    return {
+        "int_array": np.array([np.int32(1), np.int32(2)]),
+        "double_array": np.array([1.0, 2.0]),
+        "float_array": np.array([np.float32(1.0), np.float32(2.0)]),
+        "long_array": np.array([1, 2]),
+        "datetime_array": np.array(
+            [np.datetime64("2023-06-26 00:00:00"), np.datetime64("2023-06-26 00:00:00")]
+        ),
+    }
+
+
+class PythonModelWithArrayParams(mlflow.pyfunc.PythonModel):
+    def predict(self, context, model_input, params=None):
+        assert isinstance(params, dict)
+        assert all(DataType.is_integer(x) for x in params["int_array"])
+        assert all(DataType.is_double(x) for x in params["double_array"])
+        assert all(DataType.is_float(x) for x in params["float_array"])
+        assert all(DataType.is_long(x) for x in params["long_array"])
+        assert all(DataType.is_datetime(x) for x in params["datetime_array"])
+        return params
+
+
 def test_schema_enforcement_single_column_2d_array():
     X = np.array([[1], [2], [3]])
     y = np.array([1, 2, 3])
@@ -914,3 +1001,866 @@ def test_schema_enforcement_for_list_inputs():
     pd_data = pd.DataFrame([data])
     pd_check = _enforce_schema(pd_data.to_dict(orient="list"), signature.inputs)
     pd.testing.assert_frame_equal(pd_check, pd_data)
+
+
+def test_enforce_params_schema_with_success():
+    # Correct parameters & schema
+    test_parameters = {
+        "str_param": "str_a",
+        "int_param": np.int32(1),
+        "bool_param": True,
+        "double_param": 1.0,
+        "float_param": np.float32(0.1),
+        "long_param": 100,
+        "datetime_param": np.datetime64("2023-06-26 00:00:00"),
+        "str_list": ["a", "b", "c"],
+        "bool_list": [True, False],
+    }
+    test_schema = ParamSchema(
+        [
+            ParamSpec("str_param", DataType.string, "str_a", None),
+            ParamSpec("int_param", DataType.integer, np.int32(1), None),
+            ParamSpec("bool_param", DataType.boolean, True, None),
+            ParamSpec("double_param", DataType.double, 1.0, None),
+            ParamSpec("float_param", DataType.float, np.float32(0.1), None),
+            ParamSpec("long_param", DataType.long, 100, None),
+            ParamSpec(
+                "datetime_param", DataType.datetime, np.datetime64("2023-06-26 00:00:00"), None
+            ),
+            ParamSpec("str_list", DataType.string, ["a", "b", "c"], (-1,)),
+            ParamSpec("bool_list", DataType.boolean, [True, False], (-1,)),
+        ]
+    )
+    assert _enforce_params_schema(test_parameters, test_schema) == test_parameters
+
+    # Correct parameters & schema with array
+    params = {
+        "double_array": np.array([1.0, 2.0]),
+        "float_array": np.array([np.float32(1.0), np.float32(2.0)]),
+        "long_array": np.array([1, 2]),
+        "datetime_array": np.array(
+            [np.datetime64("2023-06-26 00:00:00"), np.datetime64("2023-06-26 00:00:00")]
+        ),
+    }
+    schema = ParamSchema(
+        [
+            ParamSpec("double_array", DataType.double, np.array([1.0, 2.0]), (-1,)),
+            ParamSpec(
+                "float_array", DataType.float, np.array([np.float32(1.0), np.float32(2.0)]), (-1,)
+            ),
+            ParamSpec("long_array", DataType.long, np.array([1, 2]), (-1,)),
+            ParamSpec(
+                "datetime_array",
+                DataType.datetime,
+                np.array(
+                    [np.datetime64("2023-06-26 00:00:00"), np.datetime64("2023-06-26 00:00:00")]
+                ),
+                (-1,),
+            ),
+        ]
+    )
+    for param, value in params.items():
+        assert (_enforce_params_schema(params, schema)[param] == value).all()
+
+    # Converting parameters value type to corresponding schema type
+    # 1. int -> long, float, double
+    assert _enforce_params_schema({"double_param": np.int32(1)}, test_schema)["double_param"] == 1.0
+    assert _enforce_params_schema({"float_param": np.int32(1)}, test_schema)["float_param"] == 1.0
+    assert _enforce_params_schema({"long_param": np.int32(1)}, test_schema)["long_param"] == 1
+    # With array
+    for param in ["double_array", "float_array", "long_array"]:
+        assert (
+            _enforce_params_schema({param: [np.int32(1), np.int32(2)]}, schema)[param]
+            == params[param]
+        ).all()
+        assert (
+            _enforce_params_schema({param: np.array([np.int32(1), np.int32(2)])}, schema)[param]
+            == params[param]
+        ).all()
+
+    # 2. long -> float, double
+    assert _enforce_params_schema({"double_param": 1}, test_schema)["double_param"] == 1.0
+    assert _enforce_params_schema({"float_param": 1}, test_schema)["float_param"] == 1.0
+    # With array
+    for param in ["double_array", "float_array"]:
+        assert (_enforce_params_schema({param: [1, 2]}, schema)[param] == params[param]).all()
+        assert (
+            _enforce_params_schema({param: np.array([1, 2])}, schema)[param] == params[param]
+        ).all()
+
+    # 3. float -> double
+    assert (
+        _enforce_params_schema({"double_param": np.float32(1)}, test_schema)["double_param"] == 1.0
+    )
+    assert np.isclose(
+        _enforce_params_schema({"double_param": np.float32(0.1)}, test_schema)["double_param"],
+        0.1,
+        atol=1e-6,
+    )
+    # With array
+    assert (
+        _enforce_params_schema({"double_array": [np.float32(1), np.float32(2)]}, schema)[
+            "double_array"
+        ]
+        == params["double_array"]
+    ).all()
+    assert (
+        _enforce_params_schema({"double_array": np.array([np.float32(1), np.float32(2)])}, schema)[
+            "double_array"
+        ]
+        == params["double_array"]
+    ).all()
+
+    # 4. any -> datetime (try conversion)
+    assert _enforce_params_schema({"datetime_param": "2023-07-01 00:00:00"}, test_schema)[
+        "datetime_param"
+    ] == np.datetime64("2023-07-01 00:00:00")
+
+    # With array
+    assert (
+        _enforce_params_schema(
+            {"datetime_array": ["2023-06-26 00:00:00", "2023-06-26 00:00:00"]}, schema
+        )["datetime_array"]
+        == params["datetime_array"]
+    ).all()
+    assert (
+        _enforce_params_schema(
+            {"datetime_array": np.array(["2023-06-26 00:00:00", "2023-06-26 00:00:00"])}, schema
+        )["datetime_array"]
+        == params["datetime_array"]
+    ).all()
+
+    # Add default values if the parameter is not provided
+    test_parameters = {"a": "str_a"}
+    test_schema = ParamSchema(
+        [ParamSpec("a", DataType.string, ""), ParamSpec("b", DataType.long, 1)]
+    )
+    updated_parameters = {"b": 1}
+    updated_parameters.update(test_parameters)
+    assert _enforce_params_schema(test_parameters, test_schema) == updated_parameters
+
+    # Ignore values not specified in ParamSchema and log warning
+    test_parameters = {"a": "str_a", "invalid_param": "value"}
+    test_schema = ParamSchema([ParamSpec("a", DataType.string, "")])
+    with mock.patch("mlflow.models.utils._logger.warning") as mock_warning:
+        assert _enforce_params_schema(test_parameters, test_schema) == {"a": "str_a"}
+        mock_warning.assert_called_once_with(
+            "Unrecognized params ['invalid_param'] are ignored for inference. "
+            "Supported params are: {'a'}. "
+            "To enable them, please add corresponding schema in ModelSignature."
+        )
+
+    # Converting parameters keys to string if it is not
+    test_parameters = {1: 1.0}
+    test_schema = ParamSchema([ParamSpec("1", DataType.double, 1.0)])
+    assert _enforce_params_schema(test_parameters, test_schema) == {"1": 1.0}
+
+
+def test__enforce_params_schema_add_default_values():
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input, params):
+            return list(params.values())
+
+    params = {"str_param": "string", "int_array": [1, 2, 3]}
+    signature = infer_signature(["input"], params=params)
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=MyModel(), artifact_path="my_model", signature=signature
+        )
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+
+    # Not passing params -- predict with default values
+    loaded_predict = loaded_model.predict(["input"])
+    assert loaded_predict == ["string", [1, 2, 3]]
+
+    # Passing some params -- add default values
+    loaded_predict = loaded_model.predict(["input"], params={"str_param": "new_string"})
+    assert loaded_predict == ["new_string", [1, 2, 3]]
+
+    # Passing all params -- override
+    loaded_predict = loaded_model.predict(
+        ["input"], params={"str_param": "new_string", "int_array": [4, 5, 6]}
+    )
+    assert loaded_predict == ["new_string", [4, 5, 6]]
+
+    # Raise warning for unrecognized params
+    with mock.patch("mlflow.models.utils._logger.warning") as mock_warning:
+        loaded_predict = loaded_model.predict(["input"], params={"new_param": "new_string"})
+    mock_warning.assert_called_once()
+    assert (
+        "Unrecognized params ['new_param'] are ignored for inference"
+        in mock_warning.call_args[0][0]
+    )
+    assert loaded_predict == ["string", [1, 2, 3]]
+
+
+def test_enforce_params_schema_errors():
+    # Raise error when failing to convert value to DataType.datetime
+    test_schema = ParamSchema(
+        [ParamSpec("datetime_param", DataType.datetime, np.datetime64("2023-06-06"))]
+    )
+    with pytest.raises(
+        MlflowException, match=r"Failed to convert value 1.0 from type float to DataType.datetime"
+    ):
+        _enforce_params_schema({"datetime_param": 1.0}, test_schema)
+    # With array
+    test_schema = ParamSchema(
+        [
+            ParamSpec(
+                "datetime_array",
+                DataType.datetime,
+                np.array([np.datetime64("2023-06-06"), np.datetime64("2023-06-06")]),
+                (-1,),
+            )
+        ]
+    )
+    with pytest.raises(
+        MlflowException, match=r"Failed to convert value 1.0 from type float to DataType.datetime"
+    ):
+        _enforce_params_schema({"datetime_array": [1.0, 2.0]}, test_schema)
+
+    # Raise error when failing to convert value to DataType.float
+    test_schema = ParamSchema([ParamSpec("float_param", DataType.float, np.float32(1))])
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'float_param'"):
+        _enforce_params_schema({"float_param": "a"}, test_schema)
+    # With array
+    test_schema = ParamSchema(
+        [ParamSpec("float_array", DataType.float, np.array([np.float32(1), np.float32(2)]), (-1,))]
+    )
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'float_array'"):
+        _enforce_params_schema(
+            {"float_array": [np.float32(1), np.float32(2), np.float64(3)]}, test_schema
+        )
+
+    # Raise error for any other conversions
+    error_msg = r"Incompatible types for param 'int_param'"
+    test_schema = ParamSchema([ParamSpec("int_param", DataType.long, np.int32(1))])
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"int_param": np.float32(1)}, test_schema)
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"int_param": "1"}, test_schema)
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"int_param": np.datetime64("2023-06-06")}, test_schema)
+
+    error_msg = r"Incompatible types for param 'str_param'"
+    test_schema = ParamSchema([ParamSpec("str_param", DataType.string, "1")])
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"str_param": np.float32(1)}, test_schema)
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"str_param": b"string"}, test_schema)
+    with pytest.raises(MlflowException, match=error_msg):
+        _enforce_params_schema({"str_param": np.datetime64("2023-06-06")}, test_schema)
+
+    # Raise error if parameters is not dictionary
+    with pytest.raises(MlflowException, match=r"Parameters must be a dictionary. Got type 'int'."):
+        _enforce_params_schema(100, test_schema)
+
+    # Raise error if invalid parameters are passed
+    test_parameters = {"a": True, "b": (1, 2), "c": b"test"}
+    test_schema = ParamSchema(
+        [
+            ParamSpec("a", DataType.boolean, False),
+            ParamSpec("b", DataType.string, [], (-1,)),
+            ParamSpec("c", DataType.string, ""),
+        ]
+    )
+    with pytest.raises(
+        MlflowException,
+        match=re.escape(
+            "Value must be a 1D array with shape (-1,) for param 'b': string "
+            "(default: []) (shape: (-1,)), received tuple"
+        ),
+    ):
+        _enforce_params_schema(test_parameters, test_schema)
+    # Raise error for non-1D array
+    with pytest.raises(MlflowException, match=r"received list with ndim 2"):
+        _enforce_params_schema(
+            {"a": [[1, 2], [3, 4]]}, ParamSchema([ParamSpec("a", DataType.long, [], (-1,))])
+        )
+
+
+def test_enforce_params_schema_errors_with_model_without_params():
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input, params=None):
+            return list(params.values()) if isinstance(params, dict) else None
+
+    params = {"str_param": "string", "int_array": [1, 2, 3], "123": 123}
+    signature = infer_signature(["input"])
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=MyModel(), artifact_path="model1", signature=signature
+        )
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+
+    with pytest.raises(
+        MlflowException,
+        match=r"`params` can only be specified at inference time if the model signature",
+    ):
+        loaded_model.predict(["input"], params=params)
+
+
+def test_enforce_params_schema_errors_with_model_with_params():
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input, params=None):
+            return list(params.values()) if isinstance(params, dict) else None
+
+    params = {"str_param": "string", "int_array": [1, 2, 3], "123": 123}
+    signature = infer_signature(["input"], params=params)
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=MyModel(), artifact_path="model2", signature=signature
+        )
+
+    loaded_model_with_params = mlflow.pyfunc.load_model(model_info.model_uri)
+    with pytest.raises(MlflowException, match=r"Parameters must be a dictionary. Got type 'list'"):
+        loaded_model_with_params.predict(["input"], params=[1, 2, 3])
+
+    with mock.patch("mlflow.models.utils._logger.warning") as mock_warning:
+        loaded_model_with_params.predict(["input"], params={123: 456})
+    mock_warning.assert_called_with(
+        "Keys in parameters should be of type `str`, but received non-string keys."
+        "Converting all keys to string..."
+    )
+
+
+def test_param_spec_with_success():
+    # Normal cases
+    assert ParamSpec("a", DataType.long, 1).default == 1
+    assert ParamSpec("a", DataType.string, "1").default == "1"
+    assert ParamSpec("a", DataType.boolean, True).default is True
+    assert ParamSpec("a", DataType.double, 1.0).default == 1.0
+    assert ParamSpec("a", DataType.float, np.float32(1)).default == 1
+    assert ParamSpec("a", DataType.datetime, np.datetime64("2023-06-06")).default == datetime.date(
+        2023, 6, 6
+    )
+    assert ParamSpec(
+        "a", DataType.datetime, np.datetime64("2023-06-06 00:00:00")
+    ).default == datetime.datetime(2023, 6, 6, 0, 0, 0)
+    assert ParamSpec("a", DataType.integer, np.int32(1)).default == 1
+
+    # Convert default value type if it is not consistent with provided type
+    # 1. int -> long, float, double
+    assert ParamSpec("a", DataType.long, np.int32(1)).default == 1
+    assert ParamSpec("a", DataType.float, np.int32(1)).default == 1.0
+    assert ParamSpec("a", DataType.double, np.int32(1)).default == 1.0
+    # 2. long -> float, double
+    assert ParamSpec("a", DataType.float, 1).default == 1.0
+    assert ParamSpec("a", DataType.double, 1).default == 1.0
+    # 3. float -> double
+    assert ParamSpec("a", DataType.double, np.float32(1)).default == 1.0
+    # 4. any -> datetime (try conversion)
+    assert ParamSpec("a", DataType.datetime, "2023-07-01 00:00:00").default == np.datetime64(
+        "2023-07-01 00:00:00"
+    )
+
+
+def test_param_spec_errors():
+    # Raise error if default value can not be converted to specified type
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'a'"):
+        ParamSpec("a", DataType.integer, "1.0")
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'a'"):
+        ParamSpec("a", DataType.integer, [1.0, 2.0], (-1,))
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'a'"):
+        ParamSpec("a", DataType.string, True)
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'a'"):
+        ParamSpec("a", DataType.string, [1.0, 2.0], (-1,))
+    with pytest.raises(MlflowException, match=r"Binary type is not supported for parameters"):
+        ParamSpec("a", DataType.binary, 1.0)
+    with pytest.raises(MlflowException, match=r"Failed to convert value"):
+        ParamSpec("a", DataType.datetime, 1.0)
+    with pytest.raises(MlflowException, match=r"Failed to convert value"):
+        ParamSpec("a", DataType.datetime, [1.0, 2.0], (-1,))
+    with pytest.raises(MlflowException, match=r"Invalid value for param 'a'"):
+        ParamSpec("a", DataType.datetime, np.datetime64("20230606"))
+
+    # Raise error if shape is not specified for list value
+    with pytest.raises(
+        MlflowException,
+        match=re.escape(
+            "Value should be a scalar for param 'a': long (default: [1, 2, 3]) with shape None"
+        ),
+    ):
+        ParamSpec("a", DataType.long, [1, 2, 3], shape=None)
+    with pytest.raises(
+        MlflowException,
+        match=re.escape(
+            "Value should be a scalar for param 'a': integer (default: [1 2 3]) with shape None"
+        ),
+    ):
+        ParamSpec("a", DataType.integer, np.array([1, 2, 3]), shape=None)
+
+    # Raise error if shape is specified for scalar value
+    with pytest.raises(
+        MlflowException,
+        match=re.escape(
+            "Value must be a 1D array with shape (-1,) for param 'a': boolean (default: True) "
+            "(shape: (-1,)), received bool"
+        ),
+    ):
+        ParamSpec("a", DataType.boolean, True, shape=(-1,))
+
+    # Raise error if shape specified is not allowed
+    with pytest.raises(
+        MlflowException, match=r"Shape must be None for scalar value or \(-1,\) for 1D array value"
+    ):
+        ParamSpec("a", DataType.boolean, [True, False], (2,))
+
+    # Raise error if default value is not scalar or 1D array
+    with pytest.raises(
+        MlflowException,
+        match=re.escape(
+            "Value must be a 1D array with shape (-1,) for param 'a': boolean (default: {'a': 1}) "
+            "(shape: (-1,)), received dict"
+        ),
+    ):
+        ParamSpec("a", DataType.boolean, {"a": 1}, (-1,))
+
+
+def test_enforce_schema_in_python_model_predict(sample_params_basic, param_schema_basic):
+    test_params = sample_params_basic
+    test_schema = param_schema_basic
+    signature = infer_signature(["input1"], params=test_params)
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=PythonModelWithBasicParams(),
+            artifact_path="test_model",
+            signature=signature,
+        )
+    assert signature.params == test_schema
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+    loaded_predict = loaded_model.predict(["a", "b"], params=test_params)
+    for param, value in test_params.items():
+        if param == "double_array":
+            assert (loaded_predict[param] == value).all()
+        else:
+            assert loaded_predict[param] == value
+
+    # Automatically convert type if it's not consistent with schema
+    # 1. int -> long, float, double
+    params_int = {
+        "double_param": np.int32(1),
+        "float_param": np.int32(1),
+        "long_param": np.int32(1),
+    }
+    expected_params_int = {
+        "double_param": 1.0,
+        "float_param": np.float32(1),
+        "long_param": 1,
+    }
+    loaded_predict = loaded_model.predict(["a", "b"], params=params_int)
+    for param in params_int:
+        assert loaded_predict[param] == expected_params_int[param]
+
+    # 2. long -> float, double
+    params_long = {
+        "double_param": 1,
+        "float_param": 1,
+    }
+    expected_params_long = {
+        "double_param": 1.0,
+        "float_param": np.float32(1),
+    }
+    loaded_predict = loaded_model.predict(["a", "b"], params=params_long)
+    for param in params_long:
+        assert loaded_predict[param] == expected_params_long[param]
+
+    # 3. float -> double
+    assert (
+        loaded_model.predict(
+            ["a", "b"],
+            params={
+                "double_param": np.float32(1),
+            },
+        )["double_param"]
+        == 1.0
+    )
+
+    # 4. any -> datetime (try conversion)
+    assert loaded_model.predict(
+        ["a", "b"],
+        params={
+            "datetime_param": "2023-06-26 00:00:00",
+        },
+    )[
+        "datetime_param"
+    ] == np.datetime64("2023-06-26 00:00:00")
+
+
+def test_enforce_schema_in_python_model_serving(sample_params_basic):
+    signature = infer_signature(["input1"], params=sample_params_basic)
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=PythonModelWithBasicParams(),
+            artifact_path="test_model",
+            signature=signature,
+        )
+
+    # params in payload should be json serializable
+    test_params = {
+        "str_param": "str_a",
+        "int_param": 1,
+        "bool_param": True,
+        "double_param": 1.0,
+        "float_param": 0.1,
+        "long_param": 100,
+        "datetime_param": datetime.datetime(2023, 6, 6, 0, 0, 0),
+        "str_list": ["a", "b", "c"],
+        "bool_list": [True, False],
+        "double_array": np.array([1.0, 2.0]),
+    }
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params=test_params),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 200
+    prediction = json.loads(response.content.decode("utf-8"))["predictions"]
+    for param, value in test_params.items():
+        if param == "double_array":
+            assert (prediction[param] == value).all()
+        elif param == "datetime_param":
+            assert prediction[param] == value.isoformat()
+        else:
+            assert prediction[param] == value
+
+    # Test invalid params for model serving
+    with pytest.raises(TypeError, match=r"Object of type int32 is not JSON serializable"):
+        dump_input_data(["a", "b"], params={"int_param": np.int32(1)})
+
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params={"double_param": "invalid"}),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "Incompatible types for param 'double_param'"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+    # Can not pass bytes to request
+    with pytest.raises(TypeError, match=r"Object of type bytes is not JSON serializable"):
+        pyfunc_serve_and_score_model(
+            model_info.model_uri,
+            data=dump_input_data(["a", "b"], params={"str_param": b"bytes"}),
+            content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+            extra_args=["--env-manager", "local"],
+        )
+
+
+def test_python_model_serving_compatible(tmp_path):
+    """
+    # Code for logging the model in mlflow 2.4.0
+    import mlflow
+    from mlflow.models import infer_signature
+
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input):
+            return model_input
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+                    python_model = MyModel(),
+                    artifact_path = "test_model",
+                    signature = infer_signature(["input"]),
+                    registered_model_name="model")
+    """
+    tmp_path.joinpath("MLmodel").write_text(
+        """
+artifact_path: test_model
+flavors:
+  python_function:
+    cloudpickle_version: 2.2.1
+    env:
+      conda: conda.yaml
+      virtualenv: python_env.yaml
+    loader_module: mlflow.pyfunc.model
+    python_model: python_model.pkl
+    python_version: 3.8.16
+mlflow_version: 2.4.0
+model_uuid: 3cbde93be0114644a6ec900c64cab39d
+run_id: 3f87fdff03524c19908c3a47fb99f9cd
+signature:
+  inputs: '[{"type": "string"}]'
+  outputs: null
+utc_time_created: '2023-07-13 01:29:55.467561'
+        """
+    )
+    tmp_path.joinpath("python_env.yaml").write_text(
+        """
+python: 3.8.16
+build_dependencies:
+    - pip==23.1.2
+    - setuptools==56.0.0
+    - wheel==0.40.0
+dependencies:
+    - -r requirements.txt
+        """
+    )
+    tmp_path.joinpath("requirements.txt").write_text(
+        """
+mlflow==2.4.0
+cloudpickle==2.2.1
+        """
+    )
+
+    class MyModel(mlflow.pyfunc.PythonModel):
+        def predict(self, ctx, model_input):
+            return model_input
+
+    python_model = MyModel()
+
+    with open(tmp_path / "python_model.pkl", "wb") as out:
+        cloudpickle.dump(python_model, out)
+
+    assert Version(mlflow.__version__) > Version("2.4.0")
+    model_uri = str(tmp_path)
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_uri)
+
+    assert pyfunc_loaded.metadata.signature == ModelSignature(Schema([ColSpec("string")]))
+
+    # predict is compatible
+    local_predict = pyfunc_loaded.predict(["input"])
+    assert local_predict.values[0].tolist() == ["input"]
+
+    # model serving is compatible
+    response = pyfunc_serve_and_score_model(
+        model_uri,
+        data=dump_input_data(["a", "b"]),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 200
+    prediction = json.loads(response.content.decode("utf-8"))["predictions"]
+    assert prediction == [{"0": "a"}, {"0": "b"}]
+
+
+def test_function_python_model_serving_compatible(tmp_path):
+    """
+    # Code for logging the model in mlflow 2.4.0
+    import mlflow
+    from mlflow.models import infer_signature
+
+    def my_model(model_input):
+        return model_input
+
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+                    python_model = my_model,
+                    artifact_path = "test_model",
+                    signature = infer_signature(["input"]),
+                    registered_model_name="model",
+                    input_example=["input"])
+    """
+    tmp_path.joinpath("MLmodel").write_text(
+        """
+artifact_path: test_model
+flavors:
+  python_function:
+    cloudpickle_version: 2.2.1
+    env:
+      conda: conda.yaml
+      virtualenv: python_env.yaml
+    loader_module: mlflow.pyfunc.model
+    python_model: python_model.pkl
+    python_version: 3.8.16
+mlflow_version: 2.4.0
+model_uuid: f19b9a51a34a453282e53ca41d384964
+run_id: 9fd7b6e125a547fdbb4505f15e8259ed
+saved_input_example_info:
+  artifact_path: input_example.json
+  pandas_orient: split
+  type: dataframe
+signature:
+  inputs: '[{"type": "string"}]'
+  outputs: null
+utc_time_created: '2023-07-14 10:18:44.353510'
+        """
+    )
+    tmp_path.joinpath("python_env.yaml").write_text(
+        """
+python: 3.8.16
+build_dependencies:
+    - pip==23.1.2
+    - setuptools==56.0.0
+    - wheel==0.40.0
+dependencies:
+    - -r requirements.txt
+        """
+    )
+    tmp_path.joinpath("requirements.txt").write_text(
+        """
+mlflow==2.4.0
+cloudpickle==2.2.1
+pandas==2.0.3
+        """
+    )
+    tmp_path.joinpath("input_example.json").write_text(
+        """
+{"data": [["input"]]}
+        """
+    )
+
+    def my_model(model_input):
+        return model_input
+
+    from mlflow.pyfunc.model import _FunctionPythonModel
+
+    python_model = _FunctionPythonModel(my_model, signature=infer_signature(["input"]))
+
+    with open(tmp_path / "python_model.pkl", "wb") as out:
+        cloudpickle.dump(python_model, out)
+
+    assert Version(mlflow.__version__) > Version("2.4.0")
+    model_uri = str(tmp_path)
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_uri)
+
+    assert pyfunc_loaded.metadata.signature == ModelSignature(Schema([ColSpec("string")]))
+
+    # predict is compatible
+    local_predict = pyfunc_loaded.predict(["input"])
+    assert local_predict.values[0].tolist() == ["input"]
+
+    # model serving is compatible
+    response = pyfunc_serve_and_score_model(
+        model_uri,
+        data=dump_input_data(["a", "b"]),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 200
+    prediction = json.loads(response.content.decode("utf-8"))["predictions"]
+    assert prediction == [{"0": "a"}, {"0": "b"}]
+
+
+def test_enforce_schema_with_arrays_in_python_model_predict(sample_params_with_arrays):
+    params = sample_params_with_arrays
+    signature = infer_signature(["input1"], params=params)
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=PythonModelWithArrayParams(),
+            artifact_path="test_model",
+            signature=signature,
+        )
+
+    loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)
+    loaded_predict = loaded_model.predict(["a", "b"], params=params)
+    for param, value in params.items():
+        assert (loaded_predict[param] == value).all()
+
+    # Automatically convert type if it's not consistent with schema
+    # 1. int -> long, float, double
+    for param in ["double_array", "float_array", "long_array"]:
+        loaded_predict = loaded_model.predict(
+            ["a", "b"], params={param: np.array([np.int32(1), np.int32(2)])}
+        )
+        assert (loaded_predict[param] == params[param]).all()
+    # 2. long -> float, double
+    for param in ["double_array", "float_array"]:
+        loaded_predict = loaded_model.predict(["a", "b"], params={param: np.array([1, 2])})
+        assert (loaded_predict[param] == params[param]).all()
+    # 3. float -> double
+    loaded_predict = loaded_model.predict(
+        ["a", "b"], params={"double_array": np.array([np.float32(1), np.float32(2)])}
+    )
+    assert (loaded_predict["double_array"] == params["double_array"]).all()
+    # 4. any -> datetime (try conversion)
+    loaded_predict = loaded_model.predict(
+        ["a", "b"],
+        params={"datetime_array": np.array(["2023-06-26 00:00:00", "2023-06-26 00:00:00"])},
+    )
+    assert (loaded_predict["datetime_array"] == params["datetime_array"]).all()
+
+    # Raise error if failing to convert the type
+    with pytest.raises(
+        MlflowException, match=r"Failed to convert value 1.0 from type float to DataType.datetime"
+    ):
+        loaded_model.predict(["a", "b"], params={"datetime_array": [1.0, 2.0]})
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'int_array'"):
+        loaded_model.predict(["a", "b"], params={"int_array": np.array([1.0, 2.0])})
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'float_array'"):
+        loaded_model.predict(["a", "b"], params={"float_array": [True, False]})
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'double_array'"):
+        loaded_model.predict(["a", "b"], params={"double_array": [1.0, "2.0"]})
+
+
+def test_enforce_schema_with_arrays_in_python_model_serving(sample_params_with_arrays):
+    params = sample_params_with_arrays
+    signature = infer_signature(["input1"], params=params)
+    with mlflow.start_run():
+        model_info = mlflow.pyfunc.log_model(
+            python_model=PythonModelWithArrayParams(),
+            artifact_path="test_model",
+            signature=signature,
+        )
+
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params=params),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 200
+    prediction = json.loads(response.content.decode("utf-8"))["predictions"]
+    for param, value in params.items():
+        if param == "datetime_array":
+            assert prediction[param] == list(map(np.datetime_as_string, value))
+        else:
+            assert (prediction[param] == value).all()
+
+    # Test invalid params for model serving
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params={"datetime_array": [1.0, 2.0]}),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "Failed to convert value 1.0 from type float to DataType.datetime"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params={"int_array": np.array([1.0, 2.0])}),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "Incompatible types for param 'int_array'"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params={"float_array": [True, False]}),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "Incompatible types for param 'float_array'"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=dump_input_data(["a", "b"], params={"double_array": [1.0, "2.0"]}),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "Incompatible types for param 'double_array'"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
diff --git a/tests/pyfunc/test_scoring_server.py b/tests/pyfunc/test_scoring_server.py
index ee6c3c0919c9c0..b103f61676f3e4 100644
--- a/tests/pyfunc/test_scoring_server.py
+++ b/tests/pyfunc/test_scoring_server.py
@@ -20,7 +20,7 @@
 from mlflow.protos.databricks_pb2 import ErrorCode, BAD_REQUEST
 from mlflow.pyfunc import PythonModel
 from mlflow.pyfunc.scoring_server import get_cmd
-from mlflow.types import Schema, ColSpec, DataType
+from mlflow.types import Schema, ColSpec, DataType, ParamSchema, ParamSpec
 from mlflow.utils.file_utils import TempDir
 from mlflow.utils.proto_json_utils import NumpyEncoder
 from mlflow.utils import env_manager as _EnvManager
@@ -396,7 +396,8 @@ def test_parse_json_input_records_oriented():
     }
     p1 = pd.DataFrame.from_dict(data)
     records_content = json.dumps({"dataframe_records": p1.to_dict(orient="records")})
-    p2 = pyfunc_scoring_server.infer_and_parse_json_input(records_content)
+    records_content, _ = pyfunc_scoring_server._split_data_and_params(records_content)
+    p2 = pyfunc_scoring_server.infer_and_parse_data(records_content)
     # "records" orient may shuffle column ordering. Hence comparing each column Series
     for col in data:
         assert all(p1[col] == p2[col])
@@ -411,7 +412,8 @@ def test_parse_json_input_split_oriented():
     }
     p1 = pd.DataFrame.from_dict(data)
     split_content = json.dumps({"dataframe_split": p1.to_dict(orient="split")})
-    p2 = pyfunc_scoring_server.infer_and_parse_json_input(split_content)
+    split_content, _ = pyfunc_scoring_server._split_data_and_params(split_content)
+    p2 = pyfunc_scoring_server.infer_and_parse_data(split_content)
     assert all(p1 == p2)
 
 
@@ -426,7 +428,8 @@ def test_records_oriented_json_to_df():
         ]
       }
     """
-    df = pyfunc_scoring_server.infer_and_parse_json_input(jstr)
+    jstr, _ = pyfunc_scoring_server._split_data_and_params(jstr)
+    df = pyfunc_scoring_server.infer_and_parse_data(jstr)
     assert set(df.columns) == {"zip", "cost", "score"}
     assert {str(dt) for dt in df.dtypes} == {"object", "float64", "int64"}
 
@@ -448,7 +451,8 @@ def test_split_oriented_json_to_df():
         }  
       }
     """
-    df = pyfunc_scoring_server.infer_and_parse_json_input(jstr)
+    jstr, _ = pyfunc_scoring_server._split_data_and_params(jstr)
+    df = pyfunc_scoring_server.infer_and_parse_data(jstr)
 
     assert set(df.columns) == {"zip", "cost", "count"}
     assert {str(dt) for dt in df.dtypes} == {"object", "float64", "int64"}
@@ -466,9 +470,11 @@ def test_parse_with_schema(pandas_df_with_all_types):
     schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns])
     df = _shuffle_pdf(pandas_df_with_all_types)
     json_str = json.dumps({"dataframe_split": df.to_dict(orient="split")}, cls=NumpyEncoder)
-    df = pyfunc_scoring_server.infer_and_parse_json_input(json_str, schema=schema)
+    json_str, _ = pyfunc_scoring_server._split_data_and_params(json_str)
+    df = pyfunc_scoring_server.infer_and_parse_data(json_str, schema=schema)
     json_str = json.dumps({"dataframe_records": df.to_dict(orient="records")}, cls=NumpyEncoder)
-    df = pyfunc_scoring_server.infer_and_parse_json_input(json_str, schema=schema)
+    json_str, _ = pyfunc_scoring_server._split_data_and_params(json_str)
+    df = pyfunc_scoring_server.infer_and_parse_data(json_str, schema=schema)
     assert schema == infer_signature(df[schema.input_names()]).inputs
 
     # The current behavior with pandas json parse with type hints is weird. In some cases, the
@@ -494,7 +500,8 @@ def test_parse_with_schema(pandas_df_with_all_types):
             ColSpec("boolean", "bad_boolean"),
         ]
     )
-    df = pyfunc_scoring_server.infer_and_parse_json_input(bad_df, schema=schema)
+    bad_df, _ = pyfunc_scoring_server._split_data_and_params(bad_df)
+    df = pyfunc_scoring_server.infer_and_parse_data(bad_df, schema=schema)
     # Unfortunately, the current behavior of pandas parse is to force numbers to int32 even if
     # they don't fit:
     assert df["bad_integer"].dtype == np.int32
@@ -513,7 +520,7 @@ def test_parse_with_schema(pandas_df_with_all_types):
 
 def test_serving_model_with_schema(pandas_df_with_all_types):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [[k, str(v)] for k, v in model_input.dtypes.items()]
 
     schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns])
@@ -557,6 +564,54 @@ def predict(self, context, model_input):
         assert response_json == [[k, str(v)] for k, v in expected_types.items()]
 
 
+def test_serving_model_with_param_schema(sklearn_model, model_path):
+    dataframe = {
+        "dataframe_split": pd.DataFrame(sklearn_model.inference_data).to_dict(orient="split")
+    }
+    signature = infer_signature(sklearn_model.inference_data)
+    param_schema = ParamSchema(
+        [ParamSpec("param1", DataType.datetime, np.datetime64("2023-07-01"))]
+    )
+    signature.params = param_schema
+    mlflow.sklearn.save_model(sk_model=sklearn_model.model, path=model_path, signature=signature)
+
+    # Success if passing no parameters
+    response = pyfunc_serve_and_score_model(
+        model_uri=os.path.abspath(model_path),
+        data=json.dumps(dataframe),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON + "; charset=UTF-8",
+        extra_args=["--env-manager", "local"],
+    )
+    expect_status_code(response, 200)
+
+    # Raise error if invalid value is passed
+    payload = dataframe.copy()
+    payload.update({"params": {"param1": "invalid_value1"}})
+    response = pyfunc_serve_and_score_model(
+        model_uri=os.path.abspath(model_path),
+        data=json.dumps(payload),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON + "; charset=UTF-8",
+        extra_args=["--env-manager", "local"],
+    )
+    expect_status_code(response, 400)
+    assert (
+        "Failed to convert value invalid_value1 from type str to "
+        "DataType.datetime for param 'param1'"
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+    # Ignore parameters specified in payload if it is not defined in ParamSchema
+    payload = dataframe.copy()
+    payload.update({"params": {"invalid_param": "value"}})
+    response = pyfunc_serve_and_score_model(
+        model_uri=os.path.abspath(model_path),
+        data=json.dumps(payload),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON + "; charset=UTF-8",
+        extra_args=["--env-manager", "local"],
+    )
+    expect_status_code(response, 200)
+
+
 def test_get_jsonnable_obj():
     from mlflow.pyfunc.scoring_server import _get_jsonable_obj
 
@@ -569,7 +624,7 @@ def test_get_jsonnable_obj():
 
 def test_parse_json_input_including_path():
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return 1
 
     with mlflow.start_run() as run:
diff --git a/tests/pyfunc/test_spark.py b/tests/pyfunc/test_spark.py
index 55874805e0b4c8..97bec60db03be1 100644
--- a/tests/pyfunc/test_spark.py
+++ b/tests/pyfunc/test_spark.py
@@ -1,3 +1,4 @@
+import cloudpickle
 import datetime
 import os
 import random
@@ -24,6 +25,7 @@
     BooleanType,
     StructType,
     StructField,
+    TimestampType,
 )
 from pyspark.sql.utils import AnalysisException
 from sklearn import datasets
@@ -201,7 +203,7 @@ class EnvRestoringTestModel(mlflow.pyfunc.PythonModel):
         def __init__(self):
             pass
 
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             import sklearn
 
             return model_input.apply(lambda row: sklearn.__version__, axis=1)
@@ -251,7 +253,7 @@ def test_spark_udf_env_manager_predict_sklearn_model(spark, sklearn_model, model
 
 def test_spark_udf_with_single_arg(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [",".join(map(str, model_input.columns.tolist()))] * len(model_input)
 
     with mlflow.start_run() as run:
@@ -273,7 +275,7 @@ def predict(self, context, model_input):
 
 def test_spark_udf_with_struct_return_type(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             input_len = len(model_input)
             return {
                 "r1": [1] * input_len,
@@ -545,7 +547,7 @@ def test_check_spark_udf_return_type(type_str, expected):
 
 def test_spark_udf_autofills_no_arguments(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [model_input.columns] * len(model_input)
 
     signature = ModelSignature(
@@ -648,7 +650,7 @@ def predict(self, context, model_input):
 
 def test_spark_udf_autofills_column_names_with_schema(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [model_input.columns] * len(model_input)
 
     signature = ModelSignature(
@@ -685,7 +687,7 @@ def predict(self, context, model_input):
 
 def test_spark_udf_with_datetime_columns(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [model_input.columns] * len(model_input)
 
     signature = ModelSignature(
@@ -711,7 +713,7 @@ def predict(self, context, model_input):
 
 def test_spark_udf_over_empty_partition(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             if len(model_input) == 0:
                 raise ValueError("Empty input is not allowed.")
             else:
@@ -926,7 +928,7 @@ def test_spark_udf_with_col_spec_type_input(spark):
     )
 
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             assert model_input.to_dict() == input_pdf.to_dict()
             return model_input[["c_int", "c_float"]]
 
@@ -995,7 +997,7 @@ def test_spark_udf_stdin_scoring_server(spark):
 )
 def test_spark_udf_array_of_structs(spark):
     class TestModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return [[("str", 0, 1, 0.0, 0.1, True)]] * len(model_input)
 
     signature = ModelSignature(inputs=Schema([ColSpec("long", "a")]))
@@ -1045,3 +1047,247 @@ def predict(self, context, model_input):
         data1 = spark.range(3).repartition(1)
         result = data1.select(udf("id").alias("res")).select("res.a").toPandas()
         assert list(result["a"]) == [[1.0, None], None, None]
+
+
+def test_spark_udf_with_params(spark):
+    class TestModel(PythonModel):
+        def predict(self, context, model_input, params=None):
+            return [[tuple(params.values())]] * len(model_input)
+
+    test_params = {
+        "str_param": "str_a",
+        "int_param": np.int32(1),
+        "bool_param": True,
+        "double_param": 1.0,
+        "float_param": np.float32(0.1),
+        "long_param": 100,
+    }
+
+    signature = mlflow.models.infer_signature(["input"], params=test_params)
+    spark_df = spark.createDataFrame(
+        [
+            ("input1",),
+            ("input2",),
+            ("input3",),
+        ],
+        ["input_col"],
+    )
+    with mlflow.start_run() as run:
+        mlflow.pyfunc.log_model(
+            "model",
+            python_model=TestModel(),
+            signature=signature,
+        )
+        udf = mlflow.pyfunc.spark_udf(
+            spark,
+            f"runs:/{run.info.run_id}/model",
+            result_type=ArrayType(
+                StructType(
+                    [
+                        StructField("str_param", StringType()),
+                        StructField("int_param", IntegerType()),
+                        StructField("bool_param", BooleanType()),
+                        StructField("double_param", DoubleType()),
+                        StructField("float_param", FloatType()),
+                        StructField("long_param", LongType()),
+                    ]
+                )
+            ),
+            params=test_params,
+        )
+
+        res = spark_df.withColumn("res", udf("input_col")).select("res").toPandas()
+        assert res["res"][0] == [tuple(test_params.values())]
+
+
+def test_spark_udf_with_array_params(spark):
+    class TestModel(PythonModel):
+        def predict(self, context, model_input, params=None):
+            return pd.DataFrame({k: [v] * len(model_input) for k, v in params.items()})
+
+    test_params = {
+        "str_array": np.array(["str_a", "str_b"]),
+        "int_array": np.array([np.int32(1), np.int32(2)]),
+        "double_array": np.array([1.0, 2.0]),
+        "bool_array": np.array([True, False]),
+        "float_array": np.array([np.float32(1.0), np.float32(2.0)]),
+        "long_array": np.array([1, 2]),
+    }
+
+    signature = mlflow.models.infer_signature(["input"], params=test_params)
+    spark_df = spark.createDataFrame(
+        [
+            ("input1",),
+            ("input2",),
+            ("input3",),
+        ],
+        ["input_col"],
+    )
+    with mlflow.start_run() as run:
+        mlflow.pyfunc.log_model(
+            "model",
+            python_model=TestModel(),
+            signature=signature,
+        )
+        udf = mlflow.pyfunc.spark_udf(
+            spark,
+            f"runs:/{run.info.run_id}/model",
+            result_type=StructType(
+                [
+                    StructField("str_array", ArrayType(StringType())),
+                    StructField("int_array", ArrayType(IntegerType())),
+                    StructField("double_array", ArrayType(DoubleType())),
+                    StructField("bool_array", ArrayType(BooleanType())),
+                    StructField("float_array", ArrayType(FloatType())),
+                    StructField("long_array", ArrayType(LongType())),
+                ]
+            ),
+            params=test_params,
+        )
+
+        res = spark_df.withColumn("res", udf("input_col")).select("res").toPandas()
+        assert res["res"].values[0] == tuple(v.tolist() for v in test_params.values())
+
+
+def test_spark_udf_with_params_with_errors(spark):
+    # datetime is not supported
+    class TestModel(PythonModel):
+        def predict(self, context, model_input, params=None):
+            return [params.values[0]] * len(model_input)
+
+    test_params = {"datetime_param": np.datetime64("2023-06-26 00:00:00")}
+    signature = mlflow.models.infer_signature(["input"], params=test_params)
+    with mlflow.start_run() as run:
+        mlflow.pyfunc.log_model(
+            "model",
+            python_model=TestModel(),
+            signature=signature,
+        )
+
+        with pytest.raises(MlflowException, match=r"Invalid 'spark_udf' result type"):
+            mlflow.pyfunc.spark_udf(
+                spark,
+                f"runs:/{run.info.run_id}/model",
+                result_type=TimestampType(),
+                params=test_params,
+            )
+
+
+def test_spark_udf_compatible_with_mlflow_2_4_0(tmp_path, spark):
+    """
+    # Code for logging the model in mlflow 2.4.0
+    import mlflow
+
+    class TestModel(mlflow.pyfunc.PythonModel):
+        def predict(self, context, model_input):
+            return ["string"] * len(model_input)
+
+    signature = mlflow.models.infer_signature(["input"])
+    with mlflow.start_run() as run:
+        mlflow.pyfunc.log_model(
+            "model",
+            python_model=TestModel(),
+            signature=signature,
+        )
+    """
+    tmp_path.joinpath("MLmodel").write_text(
+        """
+artifact_path: model
+flavors:
+  python_function:
+    cloudpickle_version: 2.2.1
+    env:
+      conda: conda.yaml
+      virtualenv: python_env.yaml
+    loader_module: mlflow.pyfunc.model
+    python_model: python_model.pkl
+    python_version: 3.8.16
+mlflow_version: 2.4.0
+model_uuid: 067c27bc09954838ad6d6bfc89c7eeed
+run_id: 054cfd4d129849f88210568366fea24b
+signature:
+  inputs: '[{"type": "string"}]'
+  outputs: null
+utc_time_created: '2023-07-17 10:01:42.071952'
+        """
+    )
+    tmp_path.joinpath("python_env.yaml").write_text(
+        """
+python: 3.8.16
+build_dependencies:
+    - pip==23.1.2
+    - setuptools==56.0.0
+    - wheel==0.40.0
+dependencies:
+    - -r requirements.txt
+        """
+    )
+    tmp_path.joinpath("requirements.txt").write_text(
+        """
+mlflow==2.4.0
+cloudpickle==2.2.1
+        """
+    )
+
+    class TestModel(PythonModel):
+        def predict(self, context, model_input):
+            return ["string"] * len(model_input)
+
+    python_model = TestModel()
+
+    with open(tmp_path / "python_model.pkl", "wb") as out:
+        cloudpickle.dump(python_model, out)
+
+    assert Version(mlflow.__version__) > Version("2.4.0")
+    model_uri = str(tmp_path)
+
+    spark_df = spark.createDataFrame(
+        [("input1",), ("input2",), ("input3",)],
+        ["input_col"],
+    )
+
+    udf = mlflow.pyfunc.spark_udf(
+        spark,
+        model_uri,
+        result_type=StringType(),
+    )
+    res = spark_df.withColumn("res", udf("input_col")).select("res").toPandas()
+    assert res["res"][0] == ("string")
+
+
+def test_spark_udf_with_model_serving(spark):
+    class TestModel(PythonModel):
+        def predict(self, context, model_input, params=None):
+            return ["string"] * len(model_input)
+
+    test_params = {
+        "str_param": "str_a",
+    }
+
+    signature = mlflow.models.infer_signature(["input"], params=test_params)
+    spark_df = spark.createDataFrame(
+        [
+            ("input1",),
+            ("input2",),
+            ("input3",),
+        ],
+        ["input_col"],
+    )
+    with mlflow.start_run() as run:
+        mlflow.pyfunc.log_model(
+            "model",
+            python_model=TestModel(),
+            signature=signature,
+        )
+
+    with mock.patch("mlflow.pyfunc.check_port_connectivity", return_value=False):
+        udf = mlflow.pyfunc.spark_udf(
+            spark,
+            f"runs:/{run.info.run_id}/model",
+            result_type=StringType(),
+            params=test_params,
+            env_manager="conda",
+        )
+
+        res = spark_df.withColumn("res", udf("input_col")).select("res").toPandas()
+        assert res["res"][0] == ("string")
diff --git a/tests/pytorch/test_pytorch_model_export.py b/tests/pytorch/test_pytorch_model_export.py
index f7828f152c4c9d..cbfaf2e7743093 100644
--- a/tests/pytorch/test_pytorch_model_export.py
+++ b/tests/pytorch/test_pytorch_model_export.py
@@ -633,7 +633,7 @@ def load_context(self, context):
             # pylint: disable=attribute-defined-outside-init
             self.pytorch_model = mlflow.pytorch.load_model(context.artifacts["pytorch_model"])
 
-        def predict(self, _, model_input):
+        def predict(self, _, model_input, params=None):
             with torch.no_grad():
                 input_tensor = torch.from_numpy(model_input.values.astype(np.float32))
                 output_tensor = self.pytorch_model(input_tensor)
diff --git a/tests/sentence_transformers/test_sentence_transformers_model_export.py b/tests/sentence_transformers/test_sentence_transformers_model_export.py
index 809fb71f437f3b..54a8e5a535b53d 100644
--- a/tests/sentence_transformers/test_sentence_transformers_model_export.py
+++ b/tests/sentence_transformers/test_sentence_transformers_model_export.py
@@ -12,6 +12,7 @@
 import mlflow.pyfunc.scoring_server as pyfunc_scoring_server
 import mlflow.sentence_transformers
 from mlflow import pyfunc
+from mlflow.exceptions import MlflowException
 from mlflow.models import Model, infer_signature
 from mlflow.models.utils import _read_example
 from mlflow.store.artifact.s3_artifact_repo import S3ArtifactRepository
@@ -294,6 +295,7 @@ def test_default_signature_assignment():
     expected_signature = {
         "inputs": '[{"type": "string"}]',
         "outputs": '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": ' "[-1]}}]",
+        "params": None,
     }
 
     default_signature = mlflow.sentence_transformers._get_default_signature()
@@ -323,15 +325,59 @@ def test_model_pyfunc_save_load(basic_model, model_path):
     np.testing.assert_array_equal(emb1, emb3)
 
 
+def test_model_pyfunc_predict_with_params(basic_model, tmp_path):
+    sentence = "hello world and hello mlflow"
+    params = {"batch_size": 16}
+
+    model_path = tmp_path / "model1"
+    signature = infer_signature(sentence, params=params)
+    mlflow.sentence_transformers.save_model(basic_model, model_path, signature=signature)
+    loaded_pyfunc = pyfunc.load_model(model_uri=model_path)
+    embedding_dim = basic_model.get_sentence_embedding_dimension()
+
+    emb0 = loaded_pyfunc.predict(sentence, params)
+    assert emb0.shape == (1, embedding_dim)
+
+    with pytest.raises(MlflowException, match=r"Incompatible types for param 'batch_size'"):
+        loaded_pyfunc.predict(sentence, {"batch_size": "16"})
+
+    model_path = tmp_path / "model2"
+    mlflow.sentence_transformers.save_model(
+        basic_model,
+        model_path,
+        signature=infer_signature(sentence, params={"invalid_param": "value"}),
+    )
+    loaded_pyfunc = pyfunc.load_model(model_uri=model_path)
+    with pytest.raises(
+        MlflowException, match=r"Received invalid parameter value for `params` argument"
+    ):
+        loaded_pyfunc.predict(sentence, {"invalid_param": "random_value"})
+
+    model_path = tmp_path / "model3"
+    mlflow.sentence_transformers.save_model(basic_model, model_path)
+    loaded_pyfunc = pyfunc.load_model(model_uri=model_path)
+    with pytest.raises(
+        MlflowException,
+        match=r"`params` can only be specified at inference time if the model "
+        r"signature defines a params schema. This model does not define a params schema.",
+    ):
+        loaded_pyfunc.predict(sentence, params)
+
+
 def test_spark_udf(basic_model, spark):
+    params = {"batch_size": 16}
     with mlflow.start_run():
-        model_info = mlflow.sentence_transformers.log_model(basic_model, "my_model")
+        signature = infer_signature(SENTENCES, basic_model.encode(SENTENCES), params)
+        model_info = mlflow.sentence_transformers.log_model(
+            basic_model, "my_model", signature=signature
+        )
 
     result_type = ArrayType(DoubleType())
     loaded_model = mlflow.pyfunc.spark_udf(
         spark,
         model_info.model_uri,
         result_type=result_type,
+        params=params,
     )
 
     df = spark.createDataFrame([("hello MLflow",), ("bye world",)], ["text"])
diff --git a/tests/sklearn/test_sklearn_model_export.py b/tests/sklearn/test_sklearn_model_export.py
index 4f564f87958dda..07aa3197577367 100644
--- a/tests/sklearn/test_sklearn_model_export.py
+++ b/tests/sklearn/test_sklearn_model_export.py
@@ -1,4 +1,6 @@
+from packaging.version import Version
 from pathlib import Path
+import pickle
 from unittest import mock
 import os
 import pytest
@@ -9,6 +11,7 @@
 
 import numpy as np
 import pandas as pd
+import sklearn
 from sklearn import datasets
 import sklearn.linear_model as glm
 import sklearn.neighbors as knn
@@ -668,6 +671,107 @@ def test_pyfunc_serve_and_score(sklearn_knn_model):
     np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))
 
 
+@pytest.mark.skipif(
+    Version(sklearn.__version__) != Version("1.2.2"),
+    reason="'sklearn.metrics._dist_metrics' doesn't have attribute 'EuclideanDistance'",
+)
+def test_sklearn_compatible_with_mlflow_2_4_0(sklearn_knn_model, tmp_path):
+    model, inference_dataframe = sklearn_knn_model
+    model_predict = model.predict(inference_dataframe)
+
+    # save test model
+    tmp_path.joinpath("MLmodel").write_text(
+        f"""
+artifact_path: model
+flavors:
+  python_function:
+    env:
+      conda: conda.yaml
+      virtualenv: python_env.yaml
+    loader_module: mlflow.sklearn
+    model_path: model.pkl
+    predict_fn: predict
+    python_version: 3.8.16
+  sklearn:
+    code: null
+    pickled_model: model.pkl
+    serialization_format: cloudpickle
+    sklearn_version: {sklearn.__version__}
+mlflow_version: 2.4.0
+model_uuid: c9833d74b1ff4013a1c9eff05d39eeef
+run_id: 8146a2ae86104f5b853351e600fc9d7b
+utc_time_created: '2023-07-04 07:19:43.561797'
+"""
+    )
+    tmp_path.joinpath("python_env.yaml").write_text(
+        """
+python: 3.8.16
+build_dependencies:
+   - pip==23.1.2
+   - setuptools==56.0.0
+   - wheel==0.40.0
+dependencies:
+   - -r requirements.txt    
+"""
+    )
+    tmp_path.joinpath("requirements.txt").write_text(
+        f"""
+mlflow==2.4.0
+cloudpickle
+numpy
+psutil
+scikit-learn=={sklearn.__version__}
+scipy
+"""
+    )
+    with open(tmp_path / "model.pkl", "wb") as out:
+        pickle.dump(model, out, protocol=pickle.DEFAULT_PROTOCOL)
+
+    assert Version(mlflow.__version__) > Version("2.4.0")
+    model_uri = str(tmp_path)
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_uri)
+
+    # predict is compatible
+    local_predict = pyfunc_loaded.predict(inference_dataframe)
+    np.testing.assert_array_almost_equal(local_predict, model_predict)
+
+    # model serving is compatible
+    resp = pyfunc_serve_and_score_model(
+        model_uri,
+        data=pd.DataFrame(inference_dataframe),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
+    )
+    scores = pd.DataFrame(
+        data=json.loads(resp.content.decode("utf-8"))["predictions"]
+    ).values.squeeze()
+    np.testing.assert_array_almost_equal(scores, model_predict)
+
+    # Raise error if trying to pass params to model logged with mlflow < 2.5.0
+    with pytest.raises(
+        MlflowException,
+        match=r"`params` can only be specified at inference "
+        r"time if the model signature defines a params schema.",
+    ):
+        pyfunc_loaded.predict(inference_dataframe, params={"top_k": 2})
+
+    # Raise error if trying to pass params to model logged with mlflow < 2.5.0 for model serving
+    response = pyfunc_serve_and_score_model(
+        model_uri,
+        data=json.dumps(
+            {"dataframe_split": inference_dataframe.to_dict(orient="split"), "params": {"top_k": 2}}
+        ),
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    assert response.status_code == 400
+    assert (
+        "`params` can only be specified at inference time if the model "
+        "signature defines a params schema."
+        in json.loads(response.content.decode("utf-8"))["message"]
+    )
+
+
 def test_log_model_with_code_paths(sklearn_knn_model):
     artifact_path = "model"
     with mlflow.start_run(), mock.patch(
diff --git a/tests/store/model_registry/test_file_store.py b/tests/store/model_registry/test_file_store.py
index 74aee64cdc93a4..5f478717fa4f58 100644
--- a/tests/store/model_registry/test_file_store.py
+++ b/tests/store/model_registry/test_file_store.py
@@ -1483,7 +1483,7 @@ def test_pyfunc_model_registry_with_file_store(store):
     from mlflow.pyfunc import PythonModel
 
     class MyModel(PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return 7
 
     mlflow.set_registry_uri(path_to_local_file_uri(store.root_directory))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index aa505e24901456..e326d2f9ab605c 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -393,7 +393,7 @@ def invoke_gc(*args):
 )
 def test_mlflow_models_serve(enable_mlserver):
     class MyModel(pyfunc.PythonModel):
-        def predict(self, context, model_input):
+        def predict(self, context, model_input, params=None):
             return np.array([1, 2, 3])
 
     model = MyModel()
diff --git a/tests/transformers/test_transformers_model_export.py b/tests/transformers/test_transformers_model_export.py
index d404cc603ef5d7..8746d84909ecd8 100644
--- a/tests/transformers/test_transformers_model_export.py
+++ b/tests/transformers/test_transformers_model_export.py
@@ -1334,7 +1334,7 @@ def test_qa_pipeline_pyfunc_load_and_infer(small_qa_pipeline, model_path, infere
     ],
 )
 def test_text2text_generation_pipeline_with_inference_configs(
-    text2text_generation_pipeline, model_path, data, result
+    text2text_generation_pipeline, tmp_path, data, result
 ):
     signature = infer_signature(
         data, mlflow.transformers.generate_signature_output(text2text_generation_pipeline, data)
@@ -1348,13 +1348,14 @@ def test_text2text_generation_pipeline_with_inference_configs(
         "top_p": 0.85,
         "repetition_penalty": 1.15,
     }
+    model_path1 = tmp_path.joinpath("model1")
     mlflow.transformers.save_model(
         text2text_generation_pipeline,
-        path=model_path,
+        path=model_path1,
         inference_config=inference_config,
         signature=signature,
     )
-    pyfunc_loaded = mlflow.pyfunc.load_model(model_path)
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_path1)
 
     inference = pyfunc_loaded.predict(data)
 
@@ -1367,6 +1368,88 @@ def test_text2text_generation_pipeline_with_inference_configs(
     pd_inference = pyfunc_loaded.predict(pd_input)
     assert pd_inference == result
 
+    model_path2 = tmp_path.joinpath("model2")
+    signature_with_params = infer_signature(
+        data,
+        mlflow.transformers.generate_signature_output(text2text_generation_pipeline, data),
+        inference_config,
+    )
+    mlflow.transformers.save_model(
+        text2text_generation_pipeline,
+        path=model_path2,
+        signature=signature_with_params,
+    )
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_path2)
+
+    dict_inference = pyfunc_loaded.predict(
+        data,
+        params=inference_config,
+    )
+
+    assert dict_inference == inference
+
+
+def test_text2text_generation_pipeline_with_params(text2text_generation_pipeline, tmp_path):
+    data = "muppet keyboard type"
+    parameters = {"top_k": 2, "num_beams": 5}
+    generated_output = mlflow.transformers.generate_signature_output(
+        text2text_generation_pipeline, data
+    )
+    signature = infer_signature(
+        data,
+        generated_output,
+        parameters,
+    )
+
+    model_path = tmp_path / "model1"
+    mlflow.transformers.save_model(
+        text2text_generation_pipeline,
+        path=model_path,
+        signature=signature,
+    )
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_path)
+    pyfunc_loaded.predict(data, parameters)
+
+    parameters.update({"invalid_param": "invalid_param"})
+    model_path = tmp_path / "model2"
+    mlflow.transformers.save_model(
+        text2text_generation_pipeline,
+        path=model_path,
+        signature=infer_signature(
+            data,
+            generated_output,
+            parameters,
+        ),
+    )
+    pyfunc_loaded = mlflow.pyfunc.load_model(model_path)
+    with pytest.raises(
+        MlflowException,
+        match=r"The params provided to the `predict` method are "
+        r"not valid for pipeline Text2TextGenerationPipeline.",
+    ):
+        pyfunc_loaded.predict(data, parameters)
+
+    with pytest.raises(MlflowException, match=r"Invalid parameters found"):
+        pyfunc_loaded.predict(data, {"top_k": "2"})
+
+    model_path = tmp_path / "model3"
+    mlflow.transformers.save_model(
+        text2text_generation_pipeline,
+        model_path,
+        signature=infer_signature(
+            data,
+            generated_output,
+            params={"invalid_param": "value"},
+        ),
+    )
+    loaded_pyfunc = pyfunc.load_model(model_uri=model_path)
+    with pytest.raises(
+        MlflowException,
+        match=r"The params provided to the `predict` method are not "
+        r"valid for pipeline Text2TextGenerationPipeline.",
+    ):
+        loaded_pyfunc.predict(data, {"invalid_param": "random_value"})
+
 
 @pytest.mark.skipif(RUNNING_IN_GITHUB_ACTIONS, reason=GITHUB_ACTIONS_SKIP_REASON)
 def test_text2text_generation_pipeline_with_inferred_schema(text2text_generation_pipeline):
@@ -3280,6 +3363,112 @@ def test_save_model_card_with_non_utf_characters(tmp_path, model_name):
     assert data == card_data.data.to_dict()
 
 
+def test_qa_pipeline_pyfunc_predict_with_kwargs(small_qa_pipeline):
+    artifact_path = "qa_model"
+    data = {
+        "question": [
+            "What color is it?",
+            "How do the people go?",
+            "What does the 'wolf' howl at?",
+        ],
+        "context": [
+            "Some people said it was green but I know that it's pink.",
+            "The people on the bus go up and down. Up and down.",
+            "The pack of 'wolves' stood on the cliff and a 'lone wolf' howled at "
+            "the moon for hours.",
+        ],
+    }
+    parameters = {
+        "top_k": 2,
+        "max_answer_len": 5,
+    }
+    inference_payload = json.dumps(
+        {
+            "inputs": data,
+            "params": parameters,
+        }
+    )
+    signature_with_params = infer_signature(
+        data,
+        mlflow.transformers.generate_signature_output(small_qa_pipeline, data),
+        parameters,
+    )
+
+    with mlflow.start_run():
+        mlflow.transformers.log_model(
+            transformers_model=small_qa_pipeline,
+            artifact_path=artifact_path,
+            signature=signature_with_params,
+        )
+        model_uri = mlflow.get_artifact_uri(artifact_path)
+
+    response = pyfunc_serve_and_score_model(
+        model_uri,
+        data=inference_payload,
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    values = PredictionsResponse.from_json(response.content.decode("utf-8")).get_predictions()
+
+    assert values.to_dict(orient="records") == [
+        {0: "pink"},
+        {0: "pink."},
+        {0: "up and down"},
+        {0: "Up and down"},
+        {0: "the moon"},
+        {0: "moon"},
+    ]
+
+
+@pytest.mark.skipif(
+    Version(transformers.__version__) < Version("4.29.0"), reason="Feature does not exist"
+)
+@pytest.mark.skipcacheclean
+def test_whisper_model_serve_and_score_with_timestamps_with_kwargs(
+    whisper_pipeline, raw_audio_file
+):
+    artifact_path = "whisper_timestamps"
+    inference_config = {
+        "return_timestamps": "word",
+        "chunk_length_s": 20,
+        "stride_length_s": [5, 3],
+    }
+    signature = infer_signature(
+        raw_audio_file,
+        mlflow.transformers.generate_signature_output(whisper_pipeline, raw_audio_file),
+        params=inference_config,
+    )
+    with mlflow.start_run():
+        model_info = mlflow.transformers.log_model(
+            transformers_model=whisper_pipeline,
+            artifact_path=artifact_path,
+            signature=signature,
+            input_example=raw_audio_file,
+        )
+
+    inference_payload = json.dumps(
+        {
+            "inputs": [base64.b64encode(raw_audio_file).decode("ascii")],
+            "inference_config": inference_config,
+        }
+    )
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=inference_payload,
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    values = PredictionsResponse.from_json(response.content.decode("utf-8")).get_predictions()
+    payload_output = json.loads(values.loc[0, 0])
+
+    assert (
+        payload_output["text"]
+        == mlflow.transformers.load_model(model_info.model_uri)(raw_audio_file, **inference_config)[
+            "text"
+        ]
+    )
+
+
 def test_uri_directory_renaming_handling_pipeline(model_path, small_seq2seq_pipeline):
     with mlflow.start_run():
         mlflow.transformers.save_model(transformers_model=small_seq2seq_pipeline, path=model_path)
diff --git a/tests/types/test_schema.py b/tests/types/test_schema.py
index d28c3497f38b6b..2cd93e13a4fbb9 100644
--- a/tests/types/test_schema.py
+++ b/tests/types/test_schema.py
@@ -1,3 +1,4 @@
+import datetime
 import json
 import math
 import re
@@ -10,8 +11,9 @@
 from mlflow.exceptions import MlflowException
 from mlflow.models.utils import _enforce_tensor_spec
 from mlflow.types import DataType
-from mlflow.types.schema import ColSpec, Schema, TensorSpec
+from mlflow.types.schema import ColSpec, Schema, TensorSpec, ParamSchema, ParamSpec
 from mlflow.types.utils import (
+    _infer_param_schema,
     _infer_schema,
     _get_tensor_shape,
     _validate_input_dictionary_contains_only_strings_and_lists_of_strings,
@@ -668,3 +670,290 @@ def test_enforce_tensor_spec_variable_signature():
         match=re.escape(r"Shape of input (2,) does not match expected shape (-1, 2, 3)."),
     ):
         _enforce_tensor_spec(ragged_array, standard_spec)
+
+
+def test_datatype_type_check():
+    assert DataType.is_string("string")
+
+    assert DataType.is_integer(1)
+    assert DataType.is_integer(np.int32(1))
+    assert not DataType.is_integer(np.int64(1))
+    # Note that isinstance(True, int) returns True
+    assert not DataType.is_integer(True)
+
+    assert DataType.is_long(1)
+    assert DataType.is_long(np.int64(1))
+    assert not DataType.is_long(np.int32(1))
+
+    assert DataType.is_boolean(True)
+    assert DataType.is_boolean(np.bool_(True))
+    assert not DataType.is_boolean(1)
+
+    assert DataType.is_double(1.0)
+    assert DataType.is_double(np.float64(1.0))
+    assert not DataType.is_double(np.float32(1.0))
+
+    assert DataType.is_float(1.0)
+    assert DataType.is_float(np.float32(1.0))
+    assert not DataType.is_float(np.float64(1.0))
+
+    assert DataType.is_datetime(datetime.date(2023, 6, 26))
+    assert DataType.is_datetime(np.datetime64("2023-06-26 00:00:00"))
+    assert not DataType.is_datetime("2023-06-26 00:00:00")
+
+
+def test_param_schema_find_duplicates():
+    with pytest.raises(
+        MlflowException, match=re.escape("Duplicated parameters found in schema: ['param1']")
+    ):
+        ParamSchema(
+            [
+                ParamSpec("param1", DataType.string, "default1", None),
+                ParamSpec("param1", DataType.string, "default1", None),
+                ParamSpec("param2", DataType.string, "default2", None),
+            ]
+        )
+
+    with pytest.raises(
+        MlflowException, match=re.escape("Duplicated parameters found in schema: ['param1']")
+    ):
+        ParamSchema(
+            [
+                ParamSpec("param1", DataType.string, "default1", None),
+                ParamSpec("param2", DataType.string, "default2", None),
+                ParamSpec("param1", DataType.string, "default1", None),
+            ]
+        )
+
+    with pytest.raises(
+        MlflowException, match=re.escape("Duplicated parameters found in schema: ['param3']")
+    ):
+        ParamSchema(
+            [
+                ParamSpec("param1", DataType.string, "default1", None),
+                ParamSpec("param2", DataType.string, "default2", None),
+                ParamSpec("param3", DataType.string, "default3", None),
+                ParamSpec("param3", DataType.string, "default3", None),
+            ]
+        )
+
+
+def test_param_spec_to_and_from_dict():
+    spec = ParamSpec("str_param", DataType.string, "str_a", None)
+    assert spec.to_dict() == {
+        "name": "str_param",
+        "dtype": "string",
+        "default": "str_a",
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("str_array", DataType.string, ["str_a", "str_b"], (-1,))
+    assert spec.to_dict() == {
+        "name": "str_array",
+        "dtype": "string",
+        "default": ["str_a", "str_b"],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("int_param", DataType.integer, np.int32(1), None)
+    assert spec.to_dict() == {
+        "name": "int_param",
+        "dtype": "integer",
+        "default": 1,
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("int_array", DataType.integer, [np.int32(1), np.int32(2)], (-1,))
+    assert spec.to_dict() == {
+        "name": "int_array",
+        "dtype": "integer",
+        "default": [1, 2],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("bool_param", DataType.boolean, True, None)
+    assert spec.to_dict() == {
+        "name": "bool_param",
+        "dtype": "boolean",
+        "default": True,
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("bool_array", DataType.boolean, [True, False], (-1,))
+    assert spec.to_dict() == {
+        "name": "bool_array",
+        "dtype": "boolean",
+        "default": [True, False],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("double_param", DataType.double, 1.0, None)
+    assert spec.to_dict() == {
+        "name": "double_param",
+        "dtype": "double",
+        "default": 1.0,
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("double_array", DataType.double, [1.0, 2.0], (-1,))
+    assert spec.to_dict() == {
+        "name": "double_array",
+        "dtype": "double",
+        "default": [1.0, 2.0],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("float_param", DataType.float, np.float32(0.1), None)
+    assert spec.to_dict() == {
+        "name": "float_param",
+        "dtype": "float",
+        "default": float(np.float32(0.1)),
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("float_array", DataType.float, [np.float32(0.1), np.float32(0.2)], (-1,))
+    assert spec.to_dict() == {
+        "name": "float_array",
+        "dtype": "float",
+        "default": [float(np.float32(0.1)), float(np.float32(0.2))],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("long_param", DataType.long, 100, None)
+    assert spec.to_dict() == {
+        "name": "long_param",
+        "dtype": "long",
+        "default": 100,
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec("long_array", DataType.long, [100, 200], (-1,))
+    assert spec.to_dict() == {
+        "name": "long_array",
+        "dtype": "long",
+        "default": [100, 200],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec(
+        "datetime_param", DataType.datetime, np.datetime64("2023-06-26 00:00:00"), None
+    )
+    assert spec.to_dict() == {
+        "name": "datetime_param",
+        "dtype": "datetime",
+        "default": "2023-06-26T00:00:00",
+        "shape": None,
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+    spec = ParamSpec(
+        "datetime_array",
+        DataType.datetime,
+        [np.datetime64("2023-06-26 00:00:00"), np.datetime64("2023-06-27 00:00:00")],
+        (-1,),
+    )
+    assert spec.to_dict() == {
+        "name": "datetime_array",
+        "dtype": "datetime",
+        "default": ["2023-06-26T00:00:00", "2023-06-27T00:00:00"],
+        "shape": (-1,),
+    }
+    assert ParamSpec.from_json_dict(**json.loads(json.dumps(spec.to_dict()))) == spec
+
+
+def test_infer_param_schema():
+    test_params = {
+        "str_param": "str_a",
+        "int_param": np.int32(1),
+        "bool_param": True,
+        "double_param": 1.0,
+        "float_param": np.float32(0.1),
+        "long_param": np.int64(100),
+        "datetime_param": np.datetime64("2023-06-26 00:00:00"),
+        "str_list": ["a", "b", "c"],
+        "bool_list": [True, False],
+        "double_array": np.array([1.0, 2.0]),
+        "float_array": np.array([np.float32(0.1), np.float32(0.2)]),
+        "long_array": np.array([np.int64(100), np.int64(200)]),
+        "datetime_array": np.array([datetime.date(2023, 6, 26)]),
+        "str_array": np.array(["a", "b", "c"]),
+        "bool_array": np.array([True, False]),
+        "int_array": np.array([np.int32(1), np.int32(2)]),
+    }
+    test_schema = ParamSchema(
+        [
+            ParamSpec("str_param", DataType.string, "str_a", None),
+            ParamSpec("int_param", DataType.integer, np.int32(1), None),
+            ParamSpec("bool_param", DataType.boolean, True, None),
+            ParamSpec("double_param", DataType.double, 1.0, None),
+            ParamSpec("float_param", DataType.float, np.float32(0.1), None),
+            ParamSpec("long_param", DataType.long, 100, None),
+            ParamSpec(
+                "datetime_param", DataType.datetime, np.datetime64("2023-06-26 00:00:00"), None
+            ),
+            ParamSpec("str_list", DataType.string, ["a", "b", "c"], (-1,)),
+            ParamSpec("bool_list", DataType.boolean, [True, False], (-1,)),
+            ParamSpec("double_array", DataType.double, [1.0, 2.0], (-1,)),
+            ParamSpec("float_array", DataType.float, [np.float32(0.1), np.float32(0.2)], (-1,)),
+            ParamSpec("long_array", DataType.long, [100, 200], (-1,)),
+            ParamSpec("datetime_array", DataType.datetime, [datetime.date(2023, 6, 26)], (-1,)),
+            ParamSpec("str_array", DataType.string, ["a", "b", "c"], (-1,)),
+            ParamSpec("bool_array", DataType.boolean, [True, False], (-1,)),
+            ParamSpec("int_array", DataType.integer, [1, 2], (-1,)),
+        ]
+    )
+    assert _infer_param_schema(test_params) == test_schema
+
+    assert _infer_param_schema({"datetime_param": datetime.date(2023, 6, 26)}) == ParamSchema(
+        [ParamSpec("datetime_param", DataType.datetime, datetime.date(2023, 6, 26), None)]
+    )
+
+    # Raise error if parameters is not dictionary
+    with pytest.raises(MlflowException, match=r"Expected parameters to be dict, got list"):
+        _infer_param_schema(["a", "str_a", "b", 1])
+
+    # Raise error if parameter is bytes
+    with pytest.raises(MlflowException, match=r"Binary type is not supported for parameters"):
+        _infer_param_schema({"a": b"str_a"})
+
+    # Raise error for invalid parameters types - tuple, 2D array, dictionary
+    test_parameters = {
+        "a": "str_a",
+        "b": (1, 2, 3),
+        "c": True,
+        "d": [[1, 2], [3, 4]],
+        "e": {"a": 1, "b": 2},
+    }
+    with pytest.raises(MlflowException) as e:  # pylint: disable=pytest-raises-without-match
+        _infer_param_schema(test_parameters)
+    assert e.match(r"Failed to infer schema for parameters: ")
+    assert e.match(
+        re.escape(
+            "('b', (1, 2, 3), MlflowException('Expected parameters "
+            "to be 1D array or scalar, got tuple'))"
+        )
+    )
+    assert e.match(
+        re.escape(
+            "('d', [[1, 2], [3, 4]], MlflowException('Expected parameters "
+            "to be 1D array or scalar, got 2D array'))"
+        )
+    )
+    assert e.match(
+        re.escape(
+            "('e', {'a': 1, 'b': 2}, MlflowException('Expected parameters "
+            "to be 1D array or scalar, got dict'))"
+        )
+    )