diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index ca63991af6..6e3be661bd 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -144,7 +144,7 @@ def create_chat_completion_message_event( "response.model": request_model, "vendor": "bedrock", "ingest_source": "Python", - "is_response": True + "is_response": True, } transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) @@ -246,7 +246,7 @@ def extract_bedrock_claude_model(request_body, response_body=None): chat_completion_summary_dict = { "request.max_tokens": request_body.get("max_tokens_to_sample", ""), "request.temperature": request_body.get("temperature", ""), - "response.number_of_messages": len(input_message_list) + "response.number_of_messages": len(input_message_list), } if response_body: @@ -264,6 +264,40 @@ def extract_bedrock_claude_model(request_body, response_body=None): return input_message_list, output_message_list, chat_completion_summary_dict +def extract_bedrock_llama_model(request_body, response_body=None): + request_body = json.loads(request_body) + if response_body: + response_body = json.loads(response_body) + + input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}] + + chat_completion_summary_dict = { + "request.max_tokens": request_body.get("max_gen_len", ""), + "request.temperature": request_body.get("temperature", ""), + "response.number_of_messages": len(input_message_list), + } + + if response_body: + output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}] + prompt_tokens = response_body.get("prompt_token_count", None) + completion_tokens = response_body.get("generation_token_count", None) + total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None + + chat_completion_summary_dict.update( + { + "response.usage.completion_tokens": completion_tokens, + "response.usage.prompt_tokens": prompt_tokens, + "response.usage.total_tokens": total_tokens, + "response.choices.finish_reason": response_body.get("stop_reason", ""), + "response.number_of_messages": len(input_message_list) + len(output_message_list), + } + ) + else: + output_message_list = [] + + return input_message_list, output_message_list, chat_completion_summary_dict + + def extract_bedrock_cohere_model(request_body, response_body=None): request_body = json.loads(request_body) if response_body: @@ -274,7 +308,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None): chat_completion_summary_dict = { "request.max_tokens": request_body.get("max_tokens", ""), "request.temperature": request_body.get("temperature", ""), - "response.number_of_messages": len(input_message_list) + "response.number_of_messages": len(input_message_list), } if response_body: @@ -300,6 +334,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None): ("ai21.j2", extract_bedrock_ai21_j2_model), ("cohere", extract_bedrock_cohere_model), ("anthropic.claude", extract_bedrock_claude_model), + ("meta.llama2", extract_bedrock_llama_model), ] @@ -368,7 +403,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): notice_error_attributes = { "http.statusCode": error_attributes["http.statusCode"], "error.message": error_attributes["error.message"], - "error.code": error_attributes["error.code"] + "error.code": error_attributes["error.code"], } if is_embedding: @@ -392,7 +427,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): ft.duration, True, trace_id, - span_id + span_id, ) else: handle_chat_completion_event( @@ -406,7 +441,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): ft.duration, True, trace_id, - span_id + span_id, ) finally: @@ -463,7 +498,7 @@ def handle_embedding_event( duration, is_error, trace_id, - span_id + span_id, ): embedding_id = str(uuid.uuid4()) @@ -508,7 +543,7 @@ def handle_chat_completion_event( duration, is_error, trace_id, - span_id + span_id, ): custom_attrs_dict = transaction._custom_params conversation_id = custom_attrs_dict.get("llm.conversation_id", "") diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 8534502289..94b0b954c5 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -864,7 +864,7 @@ def wrap_base_client_process_response(wrapped, instance, args, kwargs): nr_response_headers = getattr(response, "headers") return_val = wrapped(*args, **kwargs) - # Obtain reponse headers for v1 + # Obtain response headers for v1 return_val._nr_response_headers = nr_response_headers return return_val diff --git a/tests/external_botocore/_mock_external_bedrock_server.py b/tests/external_botocore/_mock_external_bedrock_server.py index da5ff68dd9..609e7afa93 100644 --- a/tests/external_botocore/_mock_external_bedrock_server.py +++ b/tests/external_botocore/_mock_external_bedrock_server.py @@ -3332,6 +3332,16 @@ "prompt": "What is 212 degrees Fahrenheit converted to Celsius?", }, ], + "meta.llama2-13b-chat-v1::What is 212 degrees Fahrenheit converted to Celsius?": [ + {"Content-Type": "application/json", "x-amzn-RequestId": "9a64cdb0-3e82-41c7-873a-c12a77e0143a"}, + 200, + { + "generation": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "prompt_token_count": 17, + "generation_token_count": 46, + "stop_reason": "stop", + }, + ], "does-not-exist::": [ { "Content-Type": "application/json", @@ -3395,6 +3405,15 @@ 403, {"message": "The security token included in the request is invalid."}, ], + "meta.llama2-13b-chat-v1::Invalid Token": [ + { + "Content-Type": "application/json", + "x-amzn-RequestId": "22476490-a0d6-42db-b5ea-32d0b8a7f751", + "x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/", + }, + 403, + {"message": "The security token included in the request is invalid."}, + ], } MODEL_PATH_RE = re.compile(r"/model/([^/]+)/invoke") @@ -3454,7 +3473,7 @@ def __init__(self, handler=simple_get, port=None, *args, **kwargs): if __name__ == "__main__": # Use this to sort dict for easier future incremental updates print("RESPONSES = %s" % dict(sorted(RESPONSES.items(), key=lambda i: (i[1][1], i[0])))) - + with MockExternalBedrockServer() as server: print("MockExternalBedrockServer serving on port %s" % str(server.port)) while True: diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index e3f53fd31f..f1d21c73c7 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -3,6 +3,7 @@ "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}', "anthropic.claude-instant-v1": '{"prompt": "Human: %s Assistant:", "temperature": %f, "max_tokens_to_sample": %d}', "cohere.command-text-v14": '{"prompt": "%s", "temperature": %f, "max_tokens": %d}', + "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}', } chat_completion_expected_events = { @@ -263,6 +264,72 @@ }, ), ], + "meta.llama2-13b-chat-v1": [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "api_key_last_four_digits": "CRET", + "duration": None, # Response time varies each test run + "request.model": "meta.llama2-13b-chat-v1", + "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 46, + "response.usage.total_tokens": 63, + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 2, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 1, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), + ], } chat_completion_invalid_model_error_events = [ @@ -480,6 +547,49 @@ }, ), ], + "meta.llama2-13b-chat-v1": [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "", + "api_key_last_four_digits": "-KEY", + "duration": None, # Response time varies each test run + "request.model": "meta.llama2-13b-chat-v1", + "response.model": "meta.llama2-13b-chat-v1", + "request.temperature": 0.7, + "request.max_tokens": 100, + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 1, + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (external_botocore)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Invalid Token", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "meta.llama2-13b-chat-v1", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ], } chat_completion_expected_client_errors = { @@ -503,4 +613,9 @@ "error.message": "The security token included in the request is invalid.", "error.code": "UnrecognizedClientException", }, + "meta.llama2-13b-chat-v1": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, } diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py index 2c4925a43b..c5c2a4706f 100644 --- a/tests/external_botocore/test_bedrock_chat_completion.py +++ b/tests/external_botocore/test_bedrock_chat_completion.py @@ -56,6 +56,7 @@ def is_file_payload(request): "ai21.j2-mid-v1", "anthropic.claude-instant-v1", "cohere.command-text-v14", + "meta.llama2-13b-chat-v1", ], ) def model_id(request):