diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/Dockerfile b/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/Dockerfile new file mode 100644 index 0000000000..f2ec4705e7 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/Dockerfile @@ -0,0 +1,18 @@ +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20230620.v1 + +ENV CONDA_ENVIRONMENT_PATH /azureml-envs/llama + +# Prepend path to AzureML conda environment +ENV PATH $CONDA_ENVIRONMENT_PATH/bin:$PATH + +# Create conda environment +COPY conda_dependencies.yaml . +RUN conda env create -p $CONDA_ENVIRONMENT_PATH -f conda_dependencies.yaml -q && \ + rm conda_dependencies.yaml && \ + conda run -p $CONDA_ENVIRONMENT_PATH pip cache purge && \ + conda clean -a -y + +RUN pip freeze + +# This is needed for mpi to locate libpython +ENV LD_LIBRARY_PATH $CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/conda_dependencies.yaml b/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/conda_dependencies.yaml new file mode 100644 index 0000000000..5a36b038ae --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-files/docker_env/conda_dependencies.yaml @@ -0,0 +1,17 @@ +channels: +- conda-forge +dependencies: +- python=3.8.16 +- pip<=23.1.2 +- pip: + - mlflow<3,>=2.3 + - azureml-evaluate-mlflow==0.0.18.post1 + - cloudpickle==2.2.1 + - torch==2.0.1 + - transformers==4.30.1 + - azure-ai-contentsafety==1.0.0b1 + - azureml-inference-server-http~=0.8.0 + - xformers==0.0.20 + - aiolimiter==1.1.0 + - azure-ai-mlmonitoring==0.1.0a3 +name: mlflow-env diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py b/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py new file mode 100644 index 0000000000..b007287167 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-files/score/score.py @@ -0,0 +1,627 @@ +import asyncio +import json +import logging +import numpy as np +import os + +from copy import deepcopy +from concurrent.futures import ThreadPoolExecutor +from inference_schema.parameter_types.abstract_parameter_type import ( + AbstractParameterType, +) +from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType +from inference_schema.parameter_types.standard_py_parameter_type import ( + StandardPythonParameterType, +) +from inference_schema.schema_decorators import input_schema, output_schema +from mlflow.models import Model +from mlflow.pyfunc import load_model +from mlflow.pyfunc.scoring_server import _get_jsonable_obj +from azure.ai.mlmonitoring import Collector +from mlflow.types.utils import _infer_schema +from mlflow.exceptions import MlflowException +from azure.ai.contentsafety import ContentSafetyClient +from azure.core.credentials import AzureKeyCredential +from azure.ai.contentsafety.models import AnalyzeTextOptions +from aiolimiter import AsyncLimiter + + +_logger = logging.getLogger(__name__) + +# Pandas installed, may not be necessary for tensorspec based models, so don't require it all the time +pandas_installed = False +try: + import pandas as pd + from inference_schema.parameter_types.pandas_parameter_type import ( + PandasParameterType, + ) + + pandas_installed = True +except ImportError as exception: + _logger.warning("Unable to import pandas") + + +class AsyncRateLimitedOpsUtils: + # 1000 requests / 10 seconds. Limiting to 800 request per 10 secods + # limiting to 1000 concurrent requests + def __init__( + self, + ops_count=800, + ops_seconds=10, + concurrent_ops=1000, + thread_max_workers=1000, + ): + self.limiter = AsyncLimiter(ops_count, ops_seconds) + self.semaphore = asyncio.Semaphore(value=concurrent_ops) + # need thread pool executor for sync function + self.executor = ThreadPoolExecutor(max_workers=thread_max_workers) + + def get_limiter(self): + return self.limiter + + def get_semaphore(self): + return self.semaphore + + def get_executor(self): + return self.executor + + +async_rate_limiter = AsyncRateLimitedOpsUtils() + + +class CsChunkingUtils: + def __init__(self, chunking_n=1000, delimiter="."): + self.delimiter = delimiter + self.chunking_n = chunking_n + + def chunkstring(self, string, length): + return (string[0 + i : length + i] for i in range(0, len(string), length)) + + def split_by(self, input): + max_n = self.chunking_n + split = [e + self.delimiter for e in input.split(self.delimiter) if e] + ret = [] + buffer = "" + + for i in split: + # if a single element > max_n, chunk by max_n + if len(i) > max_n: + ret.append(buffer) + ret.extend(list(self.chunkstring(i, max_n))) + buffer = "" + continue + if len(buffer) + len(i) <= max_n: + buffer = buffer + i + else: + ret.append(buffer) + buffer = i + + if len(buffer) > 0: + ret.append(buffer) + return ret + + +class NoSampleParameterType(AbstractParameterType): + def __init__(self): + super(NoSampleParameterType, self).__init__(None) + + def deserialize_input(self, input_data): + """ + Passthrough, do nothing to the incoming data + """ + return input_data + + def input_to_swagger(self): + """ + Return schema for an empty object + """ + return {"type": "object", "example": {}} + + +def create_tensor_spec_sample_io(model_signature_io): + # Create a sample numpy.ndarray based on shape/type of the tensor info of the model + io = model_signature_io.inputs + if not model_signature_io.has_input_names(): + # If the input is not a named tensor, the sample io value that we create will just be a numpy.ndarray + shape = io[0].shape + if shape and shape[0] == -1: + # -1 for first dimension means the input data is batched + # Create a numpy array with the first dimension of shape as 1 so that inference-schema + # can correctly generate the swagger sample for the input + shape = list(deepcopy(shape)) + shape[0] = 1 + sample_io = np.zeros(tuple(shape), dtype=io[0].type) + else: + # otherwise, the input is a named tensor, so the sample io value that we create will be + # Dict[str, numpy.ndarray], which maps input name to a numpy.ndarray of the corresponding size + sample_io = {} + for io_val in io: + shape = io_val.shape + if shape and shape[0] == -1: + # -1 for first dimension means the input data is batched + # Create a numpy array with the first dimension of shape as 1 so that inference-schema + # can correctly generate the swagger sample for the input + shape = list(deepcopy(shape)) + shape[0] = 1 + sample_io[io_val.name] = np.zeros(tuple(shape), dtype=io_val.type) + return sample_io + + +def create_col_spec_sample_io(model_signature_io): + # Create a sample pandas.DataFrame based on shape/type of the tensor info of the model + try: + columns = model_signature_io.input_names() + except AttributeError: # MLflow < 1.24.0 + columns = model_signature_io.column_names() + types = model_signature_io.pandas_types() + schema = {} + for c, t in zip(columns, types): + schema[c] = t + df = pd.DataFrame(columns=columns) + return df.astype(dtype=schema) + + +def create_other_sample_io(model_signature_io): + return model_signature_io + + +model_path = os.path.join( + os.getenv("AZUREML_MODEL_DIR"), os.getenv("MLFLOW_MODEL_FOLDER") +) + +# model loaded here using mlfow.models import Model so we have access to the model signature +model = Model.load(model_path) + +is_hfv2 = "hftransformersv2" in model.flavors +is_transformers = "transformers" in model.flavors +is_langchain = "langchain" in model.flavors +is_openai = "openai" in model.flavors + +sample_input = None +input_param = None +sample_output = None +output_param = None + + +def get_sample_input_from_loaded_example(input_example_info, loaded_input): + orient = "split" if "columns" in loaded_input else "values" + if input_example_info["type"] == "dataframe": + sample_input = pd.read_json( + json.dumps(loaded_input), + # needs open source fix + # orient=input_example_info['pandas_orient'], + orient=orient, + dtype=False, + ) + elif input_example_info["type"] == "ndarray": + inputs = loaded_input["inputs"] + if isinstance(inputs, dict): + sample_input = { + input_name: np.asarray(input_value) + for input_name, input_value in inputs.items() + } + else: + sample_input = np.asarray(inputs) + else: + # currently unused, as type always comes through from MLflow _Example creation as ndarray or dataframe + sample_input = loaded_input + _logger.warning( + 'Potentially unable to handle sample model input of type "{}". The type must be one ' + "of the list detailed in the MLflow repository: " + "https://github.com/mlflow/mlflow/blob/master/mlflow/types/utils.py#L91 " + '"dataframe" or "ndarray" is guaranteed to work best. For more information, please see: ' + 'https://aka.ms/aml-mlflow-deploy."'.format( + model.saved_input_example_info["type"] + ) + ) + return sample_input + + +# If a sample input is provided, load this input and use this as the sample input to create the +# scoring script and inference-schema decorators instead of creating a sample based on just the +# signature information +try: + if model.saved_input_example_info: + sample_input_file_path = os.path.join( + model_path, model.saved_input_example_info["artifact_path"] + ) + with open(sample_input_file_path, "r") as sample_input_file: + loaded_input = json.load(sample_input_file) + sample_input = get_sample_input_from_loaded_example( + model.saved_input_example_info, loaded_input + ) +except Exception as e: + _logger.warning( + "Failure processing model sample input: {}.\nWill attempt to create sample input based on model signature. " + "For more information, please see: https://aka.ms/aml-mlflow-deploy.".format(e) + ) + + +def get_samples_from_signature( + model_signature_x, previous_sample_input=None, previous_sample_output=None +): + if model_signature_x is None: + return previous_sample_input, previous_sample_output + model_signature_inputs = model_signature_x.inputs + model_signature_outputs = model_signature_x.outputs + if model_signature_inputs and previous_sample_input is None: + if model_signature_inputs.is_tensor_spec(): + sample_input_x = create_tensor_spec_sample_io(model_signature_inputs) + else: + try: + sample_input_x = create_col_spec_sample_io(model_signature_inputs) + except: + sample_input_x = create_other_sample_io(model_signature_inputs) + _logger.warning( + "Sample input could not be parsed as either TensorSpec" + " or ColSpec. Falling back to taking the sample as is rather than" + " converting to numpy arrays or DataFrame." + ) + else: + sample_input_x = previous_sample_input + + if model_signature_outputs and previous_sample_output is None: + if model_signature_outputs.is_tensor_spec(): + sample_output_x = create_tensor_spec_sample_io(model_signature_outputs) + else: + sample_output_x = create_col_spec_sample_io(model_signature_outputs) + else: + sample_output_x = previous_sample_output + return sample_input_x, sample_output_x + + +# Handle the signature information to attempt creation of a sample based on signature if no concrete +# sample was provided +model_signature = model.signature +if model_signature: + sample_input, sample_output = get_samples_from_signature( + model_signature, sample_input, sample_output + ) +else: + _logger.warning( + "No signature information provided for model. If no sample information was provided with the model " + "the deployment's swagger will not include input and output schema and typing information." + "For more information, please see: https://aka.ms/aml-mlflow-deploy." + ) + + +def get_parameter_type(sample_input_ex, sample_output_ex=None): + if sample_input_ex is None: + input_param = NoSampleParameterType() + else: + try: + schema = _infer_schema(sample_input_ex) + schema_types = schema.input_types + except MlflowException: + pass + finally: + if isinstance(sample_input_ex, np.ndarray): + # Unnamed tensor input + input_param = NumpyParameterType(sample_input_ex, enforce_shape=False) + elif pandas_installed and isinstance(sample_input_ex, pd.DataFrame): + # TODO check with OSS about pd.Series + input_param = PandasParameterType( + sample_input_ex, enforce_shape=False, orient="split" + ) + # elif schema_types and isinstance(sample_input_ex, dict) and not all(stype == DataType.string for stype in schema_types) and \ + # all(isinstance(value, list) for value in sample_input_ex.values()): + # # for dictionaries where there is any non-string type, named tensor + # param_arg = {} + # for key, value in sample_input_ex.items(): + # param_arg[key] = NumpyParameterType(value, enforce_shape=False) + # input_param = StandardPythonParameterType(param_arg) + elif isinstance(sample_input_ex, dict): + # TODO keeping this around while _infer_schema doesn't work on dataframe string signatures + param_arg = {} + for key, value in sample_input_ex.items(): + param_arg[key] = NumpyParameterType(value, enforce_shape=False) + input_param = StandardPythonParameterType(param_arg) + else: + # strings, bytes, lists and dictionaries with only strings as base type + input_param = NoSampleParameterType() + + if sample_output_ex is None: + output_param = NoSampleParameterType() + else: + if isinstance(sample_output_ex, np.ndarray): + # Unnamed tensor input + output_param = NumpyParameterType(sample_output_ex, enforce_shape=False) + elif isinstance(sample_output_ex, dict): + param_arg = {} + for key, value in sample_output_ex.items(): + param_arg[key] = NumpyParameterType(value, enforce_shape=False) + output_param = StandardPythonParameterType(param_arg) + else: + output_param = PandasParameterType( + sample_output_ex, enforce_shape=False, orient="records" + ) + + return input_param, output_param + + +input_param, output_param = get_parameter_type(sample_input, sample_output) + +# we use mlflow.pyfunc's load_model function because it has a predict function on it we need for inferencing +model = load_model(model_path) + + +def init(): + global inputs_collector, outputs_collector, aacs_client + endpoint = os.environ.get("CONTENT_SAFETY_ENDPOINT") + key = os.environ.get("CONTENT_SAFETY_KEY") + + # Create an Content Safety client + aacs_client = ContentSafetyClient(endpoint, AzureKeyCredential(key)) + + try: + inputs_collector = Collector(name="model_inputs") + outputs_collector = Collector(name="model_outputs") + _logger.info("Input and output collector initialized") + except Exception as e: + _logger.error( + "Error initializing model_inputs collector and model_outputs collector. {}".format( + e + ) + ) + + +async def async_analyze_text_task(client, request): + loop = asyncio.get_event_loop() + executor = async_rate_limiter.get_executor() + sem = async_rate_limiter.get_semaphore() + await sem.acquire() + async with async_rate_limiter.get_limiter(): + response = await loop.run_in_executor(executor, client.analyze_text, request) + sem.release() + severity = analyze_response(response) + return severity + + +def analyze_response(response): + severity = 0 + + if response.hate_result is not None: + _logger.info("Hate severity: {}".format(response.hate_result.severity)) + severity = max(severity, response.hate_result.severity) + if response.self_harm_result is not None: + _logger.info("SelfHarm severity: {}".format(response.self_harm_result.severity)) + severity = max(severity, response.self_harm_result.severity) + if response.sexual_result is not None: + _logger.info("Sexual severity: {}".format(response.sexual_result.severity)) + severity = max(severity, response.sexual_result.severity) + if response.violence_result is not None: + _logger.info("Violence severity: {}".format(response.violence_result.severity)) + severity = max(severity, response.violence_result.severity) + + return severity + + +def analyze_text(text): + # Chunk text + chunking_utils = CsChunkingUtils(chunking_n=1000, delimiter=".") + split_text = chunking_utils.split_by(text) + + tasks = [] + for i in split_text: + request = AnalyzeTextOptions(text=i) + tasks.append(async_analyze_text_task(aacs_client, request)) + + done, pending = asyncio.get_event_loop().run_until_complete( + asyncio.wait(tasks, timeout=60) + ) + + if len(pending) > 0: + # not all task finished, assume failed + return 2 + + return max([d.result() for d in done]) + + +def iterate(obj): + if isinstance(obj, dict): + result = {} + for key, value in obj.items(): + result[key] = iterate(value) + return result + elif isinstance(obj, list): + return [iterate(item) for item in obj] + elif isinstance(obj, str): + if analyze_text(obj) > 2: + return "" + else: + return obj + else: + return obj + + +def get_safe_response(result): + jsonable_result = _get_jsonable_obj(result, pandas_orient="records") + + print(jsonable_result) + return iterate(jsonable_result) + + +@input_schema("input_data", input_param) +@output_schema(output_param) +def run(input_data): + context = None + if ( + isinstance(input_data, np.ndarray) + or ( + isinstance(input_data, dict) + and input_data + and isinstance(list(input_data.values())[0], np.ndarray) + ) + or (pandas_installed and isinstance(input_data, pd.DataFrame)) + ): + # Collect model input + try: + context = inputs_collector.collect(input_data) + except Exception as e: + _logger.error( + "Error collecting model_inputs collection request. {}".format(e) + ) + + result = model.predict(input_data) + + # Collect model output + try: + mdc_output_df = pd.DataFrame(result) + outputs_collector.collect(mdc_output_df, context) + except Exception as e: + _logger.error( + "Error collecting model_outputs collection request. {}".format(e) + ) + + return get_safe_response(result) + + # Collect model input + try: + context = inputs_collector.collect(input) + except Exception as e: + _logger.error("Error collecting model_inputs collection request. {}".format(e)) + + if is_transformers or is_langchain or is_openai: + input = parse_model_input_from_input_data_transformers(input_data) + else: + input = parse_model_input_from_input_data_traditional(input_data) + result = model.predict(input) + + # Collect output data + try: + mdc_output_df = pd.DataFrame(result) + outputs_collector.collect(mdc_output_df, context) + except Exception as e: + _logger.error("Error collecting model_outputs collection request. {}".format(e)) + + return get_safe_response(result) + + +def parse_model_input_from_input_data_traditional(input_data): + # Format input + if isinstance(input_data, str): + input_data = json.loads(input_data) + if "input_data" in input_data: + input_data = input_data["input_data"] + if is_hfv2: + input = input_data + elif isinstance(input_data, list): + # if a list, assume the input is a numpy array + input = np.asarray(input_data) + elif ( + isinstance(input_data, dict) + and "columns" in input_data + and "index" in input_data + and "data" in input_data + ): + # if the dictionary follows pandas split column format, deserialize into a pandas Dataframe + input = pd.read_json(json.dumps(input_data), orient="split", dtype=False) + else: + # otherwise, assume input is a named tensor, and deserialize into a dict[str, numpy.ndarray] + input = { + input_name: np.asarray(input_value) + for input_name, input_value in input_data.items() + } + return input + + +def parse_model_input_from_input_data_transformers(input_data): + # Format input + if isinstance(input_data, str): + try: + input_data = json.loads(input_data) + except ValueError: + # allow non-json strings to go through + input = input_data + + if isinstance(input_data, dict) and "input_data" in input_data: + input_data = input_data["input_data"] + + if is_hfv2: + input = input_data + elif isinstance(input_data, str) or isinstance(input_data, bytes): + # strings and bytes go through + input = input_data + elif isinstance(input_data, list) and all( + isinstance(element, str) for element in input_data + ): + # lists of strings go through + input = input_data + elif isinstance(input_data, list) and all( + isinstance(element, dict) for element in input_data + ): + # lists of dicts of [str: str | List[str]] go through + try: + for dict_input in input_data: + _validate_input_dictionary_contains_only_strings_and_lists_of_strings( + dict_input + ) + input = input_data + except MlflowException: + _logger.error( + "Could not parse model input - passed a list of dictionaries which had entries which were not strings or lists." + ) + elif isinstance(input_data, list): + # if a list, assume the input is a numpy array + input = np.asarray(input_data) + elif ( + isinstance(input_data, dict) + and "columns" in input_data + and "index" in input_data + and "data" in input_data + ): + # if the dictionary follows pandas split column format, deserialize into a pandas Dataframe + input = pd.read_json(json.dumps(input_data), orient="split", dtype=False) + elif isinstance(input_data, dict): + # if input is a dictionary, but is not all ndarrays and is not pandas, it must only contain strings + try: + _validate_input_dictionary_contains_only_strings_and_lists_of_strings( + input_data + ) + input = input_data + except MlflowException: + # otherwise, assume input is a named tensor, and deserialize into a dict[str, numpy.ndarray] + input = { + input_name: np.asarray(input_value) + for input_name, input_value in input_data.items() + } + else: + input = input_data + + return input + + +def _validate_input_dictionary_contains_only_strings_and_lists_of_strings(data): + invalid_keys = [] + invalid_values = [] + value_type = None + for key, value in data.items(): + if not value_type: + value_type = type(value) + if isinstance(key, bool): + invalid_keys.append(key) + elif not isinstance(key, (str, int)): + invalid_keys.append(key) + if isinstance(value, list) and not all( + isinstance(item, (str, bytes)) for item in value + ): + invalid_values.append(key) + elif not isinstance(value, (np.ndarray, list, str, bytes)): + invalid_values.append(key) + elif isinstance(value, np.ndarray) or value_type == np.ndarray: + if not isinstance(value, value_type): + invalid_values.append(key) + if invalid_values: + from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE + + raise MlflowException( + "Invalid values in dictionary. If passing a dictionary containing strings, all " + "values must be either strings or lists of strings. If passing a dictionary containing " + "numeric values, the data must be enclosed in a numpy.ndarray. The following keys " + f"in the input dictionary are invalid: {invalid_values}", + error_code=INVALID_PARAMETER_VALUE, + ) + if invalid_keys: + raise MlflowException( + f"The dictionary keys are not all strings or indexes. Invalid keys: {invalid_keys}" + ) diff --git a/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb new file mode 100644 index 0000000000..8dc9183308 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/llama-safe-online-deployment.ipynb @@ -0,0 +1,572 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to create an Azure AI Content Safety enabled Llama online endpoint (Preview)\n", + "### This notebook will walk you through the steps to create an __Azure AI Content Safety__ enabled __Llama__ online endpoint.\n", + "### This notebook is under preview\n", + "### The steps are:\n", + "1. Create an __Azure AI Content Safety__ resource for moderating the request from user and response from the __Llama__ online endpoint.\n", + "2. Create a new __Azure AI Content Safety__ enabled __Llama__ online endpoint with a custom score.py which will integrate with the __Azure AI Content Safety__ resource to moderate the response from the __Llama__ model and the request from the user, but to make the custom score.py to sucessfully autheticated to the __Azure AI Content Safety__ resource, we have 2 options:\n", + " 1. __UAI__, recommended but more complex approach, is to create a __User Assigned Identity (UAI)__ and assign appropriate roles to the __UAI__. Then, the custom score.py can obtain the access token of the __UAI__ from the AAD server to access the Azure AI Content Safety resource.\n", + " 2. __Environment variable__, simpler but less secure approach, is to just pass the access key of the __Azure AI Content Safety__ resource to the custom score.py via environment variable, then the custom score.py can use the key directly to access the Azure AI Content Safety resource, this option is less secure than the first option, if someone in your org has access to the endpoint, he/she can get the access key from the environment variable and use it to access the Azure AI Content Safety resource.\n", + " " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Prerequisites\n", + "#### 1.1 Check List:\n", + "- [x] You have created a new Python virtual environment for this notebook.\n", + "- [x] The identity you are using to execute this notebook(yourself or your VM) need to have the __Contributor__ role on the resource group where the AML Workspace your specified is located, because this notebook will create an Azure AI Content Safety resource using that identity.\n", + "- [x] Required If you choose to use the UAI approach, the identity executing this notebook (either yourself or your virtual machine) needs to have the owner role on the resource group that contains the specified AML Workspace. This is because the notebook will create a new UAI and assign the UAI some required roles to successfully create the Azure AI Content Safety enabled Llama endpoint." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 Assign variables for the workspace and deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The public registry name contains Llama models\n", + "registry_name = \"msft-meta-preview\"\n", + "\n", + "# Name of the Llama model to be deployed\n", + "# available_llama_models_text_generation = [\"Llama-2-7b\", \"Llama-2-13b\"]\n", + "# available_llama_models_chat_complete = [\"Llama-2-7b-chat\", \"Llama-2-13b-chat\"]\n", + "model_name = \"Llama-2-7b\"\n", + "\n", + "endpoint_name = \"llama-cs-test\" # Replace with your endpoint name\n", + "deployment_name = \"llama\" # Replace with your deployment name, lower case only!!!\n", + "sku_name = \"Standard_NC24s_v3\" # Name of the sku(instance type) Check the model-list(can be found in the parent folder(inference)) to get the most optimal sku for your model (Default: Standard_DS2_v2)\n", + "\n", + "environment_name = f\"{endpoint_name}-env\" # Replace with your environment name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 Install Dependencies(as needed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment the following lines to install the required packages\n", + "# %pip install azure-identity==1.13.0\n", + "# %pip install azure-mgmt-cognitiveservices==13.4.0\n", + "# %pip install azure-ai-ml==1.8.0\n", + "# %pip install azure-mgmt-msi==7.0.0\n", + "# %pip install azure-mgmt-authorization==3.0.0" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.4 Get credential" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "\n", + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.5 Configure workspace " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "\n", + "try:\n", + " ml_client = MLClient.from_config(credential=credential)\n", + "except Exception as ex:\n", + " # enter details of your AML workspace\n", + " subscription_id = \"\"\n", + " resource_group = \"\"\n", + " workspace = \"\"\n", + "\n", + " # get a handle to the workspace\n", + " ml_client = MLClient(credential, subscription_id, resource_group, workspace)\n", + "\n", + "\n", + "subscription_id = ml_client.subscription_id\n", + "resource_group = ml_client.resource_group_name\n", + "workspace = ml_client.workspace_name\n", + "\n", + "print(f\"Connected to workspace {workspace}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.6 Assign variables for Azure Content Safety\n", + "Currently, Azure AI Content Safety is in a limited set of regions:\n", + "\n", + "\n", + "__NOTE__: before you choose the region to deploy the Azure AI Content Safety, please be aware that your data will be transferred to the region you choose and by selecting a region outside your current location, you may be allowing the transmission of your data to regions outside your jurisdiction. It is important to note that data protection and privacy laws may vary between jurisdictions. Before proceeding, we strongly advise you to familiarize yourself with the local laws and regulations governing data transfer and ensure that you are legally permitted to transmit your data to an overseas location for processing. By continuing with the selection of a different region, you acknowledge that you have understood and accepted any potential risks associated with such data transmission. Please proceed with caution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient\n", + "\n", + "acs_client = CognitiveServicesManagementClient(credential, subscription_id)\n", + "\n", + "\n", + "# settings for the Azure AI Content Safety resource\n", + "# we will choose existing AACS resource if it exists, otherwise create a new one\n", + "# name of azure ai content safety resource, has to be unique\n", + "import time\n", + "\n", + "aacs_name = f\"{endpoint_name}-aacs-{str(time.time()).replace('.','')}\"\n", + "available_aacs_locations = [\"east us\", \"west europe\"]\n", + "\n", + "# create a new Cognitive Services Account\n", + "kind = \"ContentSafety\"\n", + "aacs_sku_name = \"S0\"\n", + "aacs_location = available_aacs_locations[0]\n", + "\n", + "\n", + "print(\"Available SKUs:\")\n", + "aacs_skus = acs_client.resource_skus.list()\n", + "print(\"SKU Name\\tSKU Tier\\tLocations\")\n", + "for sku in aacs_skus:\n", + " if sku.kind == \"ContentSafety\":\n", + " locations = \",\".join(sku.locations)\n", + " print(sku.name + \"\\t\" + sku.tier + \"\\t\" + locations)\n", + "\n", + "print(\n", + " f\"Choose a new Azure AI Content Safety resource in {aacs_location} with SKU {aacs_sku_name}\"\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Create Azure AI Content Safety" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.cognitiveservices.models import Account, Sku, AccountProperties\n", + "\n", + "\n", + "parameters = Account(\n", + " sku=Sku(name=aacs_sku_name),\n", + " kind=kind,\n", + " location=aacs_location,\n", + " properties=AccountProperties(\n", + " custom_sub_domain_name=aacs_name, public_network_access=\"Enabled\"\n", + " ),\n", + ")\n", + "# How many seconds to wait between checking the status of an async operation.\n", + "wait_time = 10\n", + "\n", + "\n", + "def find_acs(accounts):\n", + " return next(\n", + " x\n", + " for x in accounts\n", + " if x.kind == \"ContentSafety\"\n", + " and x.location == aacs_location\n", + " and x.sku.name == aacs_sku_name\n", + " )\n", + "\n", + "\n", + "try:\n", + " # check if AACS exists\n", + " aacs = acs_client.accounts.get(resource_group, aacs_name)\n", + " print(f\"Found existing Azure AI content safety Account {aacs.name}.\")\n", + "except:\n", + " try:\n", + " # check if there is an existing AACS resource within same resource group\n", + " aacs = find_acs(acs_client.accounts.list_by_resource_group(resource_group))\n", + " print(\n", + " f\"Found existing Azure AI content safety Account {aacs.name} in resource group {resource_group}.\"\n", + " )\n", + " except:\n", + " print(f\"Creating Azure AI content safety Account {aacs_name}.\")\n", + " acs_client.accounts.begin_create(resource_group, aacs_name, parameters).wait()\n", + " print(\"Resource created.\")\n", + " aacs = acs_client.accounts.get(resource_group, aacs_name)\n", + "\n", + "\n", + "aacs_endpoint = aacs.properties.endpoint\n", + "aacs_resource_id = aacs.id\n", + "print(f\"AACS endpoint is {aacs_endpoint}\")\n", + "print(f\"AACS ResourceId is {aacs_resource_id}\")\n", + "\n", + "aacs_access_key = acs_client.accounts.list_keys(\n", + " resource_group_name=resource_group, account_name=aacs.name\n", + ").key1" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Create Azure AI Content Safety enabled Llama online endpoint" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3.1 Check if Llama model is available in the AML registry." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reg_client = MLClient(\n", + " credential,\n", + " subscription_id=subscription_id,\n", + " resource_group_name=resource_group,\n", + " registry_name=registry_name,\n", + ")\n", + "version_list = list(\n", + " reg_client.models.list(model_name)\n", + ") # list available versions of the model\n", + "llama_model = None\n", + "if len(version_list) == 0:\n", + " raise Exception(f\"No model named {model_name} found in registry\")\n", + "else:\n", + " model_version = version_list[0].version\n", + " llama_model = reg_client.models.get(model_name, model_version)\n", + " print(\n", + " f\"Using model name: {llama_model.name}, version: {llama_model.version}, id: {llama_model.id} for inferencing\"\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3.2 Create Llama online endpoint\n", + "This step may take a few minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import ManagedOnlineEndpoint\n", + "\n", + "# Check if the endpoint already exists in the workspace\n", + "try:\n", + " endpoint = ml_client.online_endpoints.get(endpoint_name)\n", + " print(\"---Endpoint already exists---\")\n", + "except:\n", + " # Create an online endpoint if it doesn't exist\n", + "\n", + " # Define the endpoint\n", + " endpoint = ManagedOnlineEndpoint(\n", + " name=endpoint_name, description=\"Test endpoint for model\"\n", + " )\n", + "\n", + " # Trigger the endpoint creation\n", + " try:\n", + " ml_client.begin_create_or_update(endpoint).wait()\n", + " print(\"\\n---Endpoint created successfully---\\n\")\n", + " except Exception as err:\n", + " raise RuntimeError(\n", + " f\"Endpoint creation failed. Detailed Response:\\n{err}\"\n", + " ) from err" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 3.3 Create environment for Llama endpoint\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.core.display import display, HTML\n", + "from azure.ai.ml.entities import Environment, BuildContext\n", + "\n", + "try:\n", + " env = ml_client.environments.get(environment_name, label=\"latest\")\n", + " print(\"---Environment already exists---\")\n", + "except:\n", + " print(\"---Creating environment---\")\n", + " env = Environment(\n", + " name=environment_name, build=BuildContext(path=\"./llama-files/docker_env\")\n", + " )\n", + " ml_client.environments.create_or_update(env)\n", + " env = ml_client.environments.get(environment_name, label=\"latest\")\n", + " print(\"---Please use link below to check build status---\")\n", + "\n", + "\n", + "display(\n", + " HTML(\n", + " f\"\"\"\n", + " \n", + " Click here to check env build status in AML studio\n", + " \n", + " \"\"\"\n", + " )\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 3.4 Deploy Llama model\n", + "This step may take a few minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml.entities import (\n", + " CodeConfiguration,\n", + " OnlineRequestSettings,\n", + " ManagedOnlineDeployment,\n", + " ProbeSettings,\n", + ")\n", + "import os\n", + "\n", + "# Define the deployment\n", + "# Update the model version as necessary\n", + "deployment = ManagedOnlineDeployment(\n", + " name=deployment_name,\n", + " endpoint_name=endpoint_name,\n", + " model=llama_model.id,\n", + " instance_type=sku_name,\n", + " instance_count=1,\n", + " code_configuration=CodeConfiguration(\n", + " code=\"./llama-files/score\", scoring_script=\"score.py\"\n", + " ),\n", + " environment=env,\n", + " environment_variables={\n", + " \"MLFLOW_MODEL_FOLDER\": os.path.basename(llama_model.path),\n", + " \"CONTENT_SAFETY_ENDPOINT\": aacs_endpoint,\n", + " \"CONTENT_SAFETY_KEY\": aacs_access_key,\n", + " },\n", + " request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n", + " liveness_probe=ProbeSettings(\n", + " failure_threshold=30,\n", + " success_threshold=1,\n", + " timeout=300,\n", + " period=100,\n", + " initial_delay=500,\n", + " ),\n", + " readiness_probe=ProbeSettings(\n", + " failure_threshold=30,\n", + " success_threshold=1,\n", + " timeout=300,\n", + " period=100,\n", + " initial_delay=500,\n", + " ),\n", + ")\n", + "\n", + "# Trigger the deployment creation\n", + "try:\n", + " ml_client.begin_create_or_update(deployment).wait()\n", + " print(\"\\n---Deployment created successfully---\\n\")\n", + "except Exception as err:\n", + " raise RuntimeError(\n", + " f\"Deployment creation failed. Detailed Response:\\n{err}\"\n", + " ) from err" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Test the Safety Enabled Llama online endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "test_src_dir = \"./safety-llama-test\"\n", + "os.makedirs(test_src_dir, exist_ok=True)\n", + "print(f\"test script directory: {test_src_dir}\")\n", + "sample_data = os.path.join(test_src_dir, \"sample-request.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Choose request from following 2 cells based on the Llama model you chosen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## For text-generation models (without -chat suffix)\n", + "\n", + "import json\n", + "\n", + "with open(sample_data, \"w\") as f:\n", + " json.dump(\n", + " {\n", + " \"input_data\": {\n", + " \"input_string\": [\n", + " \"Hello\",\n", + " \"My name is John and I have a dog.\",\n", + " \"The dog was given a eutanasa injection due to their severed leg bleding profusely from deep lacarations to the lower extremities, exposing tisssue and nerve.\", # This line contains hateful message and endpoint will return empty string as response\n", + " ],\n", + " }\n", + " },\n", + " f,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## For chat-complete models (without -chat suffix)\n", + "\n", + "import json\n", + "\n", + "with open(sample_data, \"w\") as f:\n", + " json.dump(\n", + " {\n", + " \"input_data\": {\n", + " \"input_string\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What is the tallest building in the world?\",\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"As of 2021, the Burj Khalifa in Dubai, United Arab Emirates is the tallest building in the world, standing at a height of 828 meters (2,722 feet). It was completed in 2010 and has 163 floors. The Burj Khalifa is not only the tallest building in the world but also holds several other records, such as the highest occupied floor, highest outdoor observation deck, elevator with the longest travel distance, and the tallest freestanding structure in the world.\",\n", + " },\n", + " {\"role\": \"user\", \"content\": \"and in Africa?\"},\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"In Africa, the tallest building is the Carlton Centre, located in Johannesburg, South Africa. It stands at a height of 50 floors and 223 meters (730 feet). The CarltonDefault Centre was completed in 1973 and was the tallest building in Africa for many years until the construction of the Leonardo, a 55-story skyscraper in Sandton, Johannesburg, which was completed in 2019 and stands at a height of 230 meters (755 feet). Other notable tall buildings in Africa include the Ponte City Apartments in Johannesburg, the John Hancock Center in Lagos, Nigeria, and the Alpha II Building in Abidjan, Ivory Coast\",\n", + " },\n", + " {\"role\": \"user\", \"content\": \"and in Europe?\"},\n", + " ],\n", + " \"parameters\": {\"max_length\": 512},\n", + " }\n", + " },\n", + " f,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_client.online_endpoints.invoke(\n", + " endpoint_name=endpoint_name,\n", + " deployment_name=deployment_name,\n", + " request_file=sample_data,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/sdk/python/foundation-models/system/inference/text-generation/prepare_uai.ipynb b/sdk/python/foundation-models/system/inference/text-generation/prepare_uai.ipynb new file mode 100644 index 0000000000..f9059683e5 --- /dev/null +++ b/sdk/python/foundation-models/system/inference/text-generation/prepare_uai.ipynb @@ -0,0 +1,446 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare UAI account for Azure Content Safety enabled Llama model deployment\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Prerequisites" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.1 Assign variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uai_name = f\"llama-uai\"\n", + "\n", + "# The name of the AACS created in deploy_llama notebook\n", + "# Leave it blank will assign all AACS resource under resources group to the UAI\n", + "aacs_name = \"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 Get credential" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential\n", + "\n", + "try:\n", + " credential = DefaultAzureCredential()\n", + " # Check if given credential can get token successfully.\n", + " credential.get_token(\"https://management.azure.com/.default\")\n", + "except Exception as ex:\n", + " # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work\n", + " credential = InteractiveBrowserCredential()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 Configure workspace " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.ai.ml import MLClient\n", + "\n", + "try:\n", + " ml_client = MLClient.from_config(credential=credential)\n", + "except Exception as ex:\n", + " # enter details of your AML workspace\n", + " subscription_id = \"\"\n", + " resource_group = \"\"\n", + " workspace_name = \"\"\n", + "\n", + " # get a handle to the workspace\n", + " ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", + "\n", + "\n", + "subscription_id = ml_client.subscription_id\n", + "resource_group = ml_client.resource_group_name\n", + "workspace_name = ml_client.workspace_name\n", + "workspace_resource = ml_client.workspaces.get(workspace_name)\n", + "workspace_location = workspace_resource.location\n", + "\n", + "print(f\"Connected to workspace {workspace_name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Create a Managed Identity for the Azure AI Content Safety enabled Llama endpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.1 Get a handle to the ManagedServiceIdentityClient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.msi import ManagedServiceIdentityClient\n", + "from azure.mgmt.msi.models import Identity\n", + "\n", + "msi_client = ManagedServiceIdentityClient(\n", + " subscription_id=subscription_id,\n", + " credential=credential,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.2 Create the User Assigned Identity:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "msi_client.user_assigned_identities.create_or_update(\n", + " resource_group_name=resource_group,\n", + " resource_name=uai_name,\n", + " parameters=Identity(location=workspace_location),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.3 Retrieve the identity object\n", + "we need to retrieve the identity object so that we can use it to deploy the Azure AI Content Safety enabled Llama online endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uai_identity = msi_client.user_assigned_identities.get(\n", + " resource_group_name=resource_group,\n", + " resource_name=uai_name,\n", + ")\n", + "uai_principal_id = uai_identity.principal_id\n", + "uai_client_id = uai_identity.client_id\n", + "uai_id = uai_identity.id\n", + "print(f\"UAI principal id: {uai_principal_id}\")\n", + "print(f\"UAI id: {uai_id}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.4 Grant appropriate roles to the UAI we created above.\n", + "Note: In order to successfully run scripts in current step, your must have owner permission on the AACS resource and the Llama endpoint, which we created in the previous steps." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.1 Get an AuthorizationManagementClient to list Role Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.authorization import AuthorizationManagementClient\n", + "from azure.mgmt.authorization.v2022_04_01.models import (\n", + " RoleAssignmentCreateParameters,\n", + ")\n", + "import uuid\n", + "\n", + "role_definition_client = AuthorizationManagementClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " api_version=\"2022-04-01\",\n", + ")\n", + "role_assignment_client = AuthorizationManagementClient(\n", + " credential=credential,\n", + " subscription_id=subscription_id,\n", + " api_version=\"2022-04-01\",\n", + ")\n", + "\n", + "uai_role_check_list = {\n", + " \"Cognitive Services Contributor\": {\n", + " \"step\": \"4.2.2\",\n", + " \"description\": \"assigne the role Cognitive Services User to the UAI on the Azure AI Content Safety resource.\",\n", + " },\n", + " \"AcrPull\": {\n", + " \"step\": \"4.2.3\",\n", + " \"description\": \"assigne the role AcrPull to the UAI on the Azure Container Registry.\",\n", + " },\n", + " \"Storage Blob Data Reader\": {\n", + " \"step\": \"4.2.4\",\n", + " \"description\": \"assigne the role Storage Blob Data Reader to the UAI on the Azure Storage account.\",\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.2 Define util function to assign access for UAI\n", + "Cognitive Services User role is required to access the Azure Content Safety resource." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "role_name = \"Cognitive Services Contributor\" # minimum role required for listing AACS access keys\n", + "\n", + "\n", + "def assign_access_to_acs(scope):\n", + " role_defs = role_definition_client.role_definitions.list(scope=scope)\n", + " role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + " from azure.core.exceptions import ResourceExistsError\n", + "\n", + " try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=scope,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + " except ResourceExistsError as ex:\n", + " pass\n", + " except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + " if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + " print(\n", + " f\"Role assignment for {role_name} at the Azure AI Content Safety resource level completed.\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.3 Grant the user identity access to the Azure Content Safety resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient\n", + "\n", + "acs_client = CognitiveServicesManagementClient(credential, subscription_id)\n", + "\n", + "\n", + "def find_acs(accounts):\n", + " return next(x for x in accounts if x.kind == \"ContentSafety\")\n", + "\n", + "\n", + "if aacs_name == \"\":\n", + " for acs_resource in acs_client.accounts.list_by_resource_group(resource_group):\n", + " assign_access_to_acs(acs_resource.id)\n", + "else:\n", + " acs_resource = acs_client.accounts.get(resource_group, aacs_name)\n", + " assign_access_to_acs(acs_resource.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.4 Assign AcrPull at the workspace container registry scope\n", + "Since we will create the Azure AI Content Safety enabled LlaMA endpoint with User Assigned Identity, the user managed identity must have Storage Blob Data Reader permission on the storage account for the workspace, and AcrPull permission on the Azure Container Registry (ACR) for the workspace. Make sure your User Assigned Identity has the right permission." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "container_registry = workspace_resource.container_registry\n", + "\n", + "role_name = \"AcrPull\"\n", + "\n", + "role_defs = role_definition_client.role_definitions.list(scope=container_registry)\n", + "role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + "from azure.core.exceptions import ResourceExistsError\n", + "\n", + "try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=container_registry,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + "except ResourceExistsError as ex:\n", + " pass\n", + "except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + "if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + "print(\"Role assignment for AcrPull at the workspace container registry completed.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### 2.4.6 Assign Storage Blob Data Reader at the workspace storage account scope" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "role_name = \"Storage Blob Data Reader\"\n", + "blob_scope = workspace_resource.storage_account\n", + "\n", + "role_defs = role_definition_client.role_definitions.list(scope=blob_scope)\n", + "role_def = next((r for r in role_defs if r.role_name == role_name))\n", + "\n", + "from azure.core.exceptions import ResourceExistsError\n", + "\n", + "try:\n", + " role_assignment_client.role_assignments.create(\n", + " scope=blob_scope,\n", + " role_assignment_name=str(uuid.uuid4()),\n", + " parameters=RoleAssignmentCreateParameters(\n", + " role_definition_id=role_def.id,\n", + " principal_id=uai_principal_id,\n", + " principal_type=\"ServicePrincipal\",\n", + " ),\n", + " )\n", + "except ResourceExistsError as ex:\n", + " pass\n", + "except Exception as ex:\n", + " print(ex)\n", + " raise ex\n", + "\n", + "if role_name in uai_role_check_list:\n", + " del uai_role_check_list[role_name]\n", + "print(\n", + " \"Role assignment for `Storage Blob Data Reader` at the workspace storage account completed.\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2.4.7 Let's make sure we didn't miss anything in the previous steps, please execute the following script to check on that:__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check everything is properly done before creating the Azure AI Content Safety Enabled Llama online endpoint\n", + "missing_steps = []\n", + "print(\n", + " \"You selected UAI to deploy the Azure AI Content Safety Enabled Llama online endpoint, checking if the UAI has the required roles assigned...\"\n", + ")\n", + "if uai_role_check_list:\n", + " for key, value in uai_role_check_list.items():\n", + " missing_steps.append(\n", + " f'Please go to step {value[\"step\"]} to {value[\"description\"]}'\n", + " )\n", + "\n", + "if missing_steps:\n", + " print(\"Seems you missed some step above.\")\n", + " steps = \"\\n\".join(missing_steps)\n", + " raise Exception(f\"Please complete the missing steps before proceeding:\\n{steps}\")\n", + "else:\n", + " print(\n", + " \"All steps are completed, proceeding to create the Azure AI Content Safety Enabled Llama online endpoint...\"\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}