diff --git a/newrelic/config.py b/newrelic/config.py index 6d46522229..da108e3d0e 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2062,6 +2062,12 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_openai", "instrument_openai_util", ) + _process_module_definition( + "openai.api_resources.abstract.engine_api_resource", + "newrelic.hooks.mlmodel_openai", + "instrument_openai_api_resources_abstract_engine_api_resource", + ) + _process_module_definition( "openai.resources.chat.completions", "newrelic.hooks.mlmodel_openai", diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 0741aaaeaa..65de5cdf39 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import uuid import openai @@ -19,8 +20,9 @@ from newrelic.api.function_trace import FunctionTrace from newrelic.api.time_trace import get_trace_linking_metadata from newrelic.api.transaction import current_transaction -from newrelic.common.object_wrapper import wrap_function_wrapper +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper from newrelic.common.package_version_utils import get_package_version +from newrelic.common.signature import bind_args from newrelic.core.config import global_settings OPENAI_VERSION = get_package_version("openai") @@ -203,91 +205,107 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): function_name = wrapped.__name__ - with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft: - # Get trace information - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") - - try: - return_val = wrapped(*args, **kwargs) - if kwargs.get("stream", False): - return return_val - except Exception as exc: - if kwargs.get("stream", False): - raise - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - exc_organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": completion_id, - } - else: - exc_organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": completion_id, - } - # Override the default message if it is not empty. - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message - - ft.notice_error( - attributes=notice_error_attributes, - ) - # Gather attributes to add to chat completion summary event in error context - error_chat_completion_dict = { - "id": completion_id, - "appName": app_name, - "conversation_id": conversation_id, - "api_key_last_four_digits": api_key_last_four_digits, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "response.number_of_messages": len(request_message_list), - "request.model": kwargs.get("model") or kwargs.get("engine") or "", - "request.temperature": kwargs.get("temperature", ""), - "request.max_tokens": kwargs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.organization": "" if exc_organization is None else exc_organization, - "duration": ft.duration, - "error": True, + ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI") + ft.__enter__() + # Get trace information + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + try: + return_val = wrapped(*args, **kwargs) + return_val._nr_ft = ft + except Exception as exc: + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + exc_organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": completion_id, } - transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) - - create_chat_completion_message_event( - transaction, - app_name, - request_message_list, - completion_id, - span_id, - trace_id, - "", - None, - "", - conversation_id, - None, - ) - - raise + else: + exc_organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": completion_id, + } + # Override the default message if it is not empty. + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + + ft.notice_error( + attributes=notice_error_attributes, + ) + # Gather attributes to add to embedding summary event in error context + error_chat_completion_dict = { + "id": completion_id, + "appName": app_name, + "conversation_id": conversation_id, + "api_key_last_four_digits": api_key_last_four_digits, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "response.number_of_messages": len(request_message_list), + "request.model": kwargs.get("model") or kwargs.get("engine") or "", + "request.temperature": kwargs.get("temperature", ""), + "request.max_tokens": kwargs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.organization": "" if exc_organization is None else exc_organization, + "duration": ft.duration, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) + + create_chat_completion_message_event( + transaction, + app_name, + request_message_list, + completion_id, + span_id, + trace_id, + "", + None, + "", + conversation_id, + None, + ) + + ft.__exit__(*sys.exc_info()) + raise + + stream = kwargs.get("stream", False) + # If response is not a stream generator, we exit the function trace now. + if not stream: + ft.__exit__(None, None, None) if not return_val: return return_val + if stream: + # The function trace will be exited when in the final iteration of the response + # generator. + setattr(return_val, "_nr_ft", ft) + setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {})) + return_val._nr_openai_attrs["messages"] = kwargs.get("messages", []) + return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "") + return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "") + return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or "" + return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits + return return_val + + # If response is not a stream generator, record the event data. # At this point, we have a response so we can grab attributes only available on the response object response_headers = getattr(return_val, "_nr_response_headers", {}) # In v1, response objects are pydantic models so this function call converts the @@ -640,7 +658,7 @@ async def wrap_embedding_async(wrapped, instance, args, kwargs): async def wrap_chat_completion_async(wrapped, instance, args, kwargs): transaction = current_transaction() - if not transaction or kwargs.get("stream", False): + if not transaction: return await wrapped(*args, **kwargs) # Framework metric also used for entity tagging in the UI @@ -665,88 +683,107 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): completion_id = str(uuid.uuid4()) function_name = wrapped.__name__ - - with FunctionTrace(name=function_name, group="Llm/completion/OpenAI") as ft: - # Get trace information - available_metadata = get_trace_linking_metadata() - span_id = available_metadata.get("span.id", "") - trace_id = available_metadata.get("trace.id", "") - - try: - return_val = await wrapped(*args, **kwargs) - except Exception as exc: - if OPENAI_V1: - response = getattr(exc, "response", "") - response_headers = getattr(response, "headers", "") - exc_organization = response_headers.get("openai-organization", "") if response_headers else "" - # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception - # object so grab them from the response body object instead. - body = getattr(exc, "body", {}) or {} - notice_error_attributes = { - "http.statusCode": getattr(exc, "status_code", "") or "", - "error.message": body.get("message", "") or "", - "error.code": body.get("code", "") or "", - "error.param": body.get("param", "") or "", - "completion_id": completion_id, - } - else: - exc_organization = getattr(exc, "organization", "") - notice_error_attributes = { - "http.statusCode": getattr(exc, "http_status", ""), - "error.message": getattr(exc, "_message", ""), - "error.code": getattr(getattr(exc, "error", ""), "code", ""), - "error.param": getattr(exc, "param", ""), - "completion_id": completion_id, - } - # Override the default message if it is not empty. - message = notice_error_attributes.pop("error.message") - if message: - exc._nr_message = message - - ft.notice_error( - attributes=notice_error_attributes, - ) - # Gather attributes to add to chat completion summary event in error context - error_chat_completion_dict = { - "id": completion_id, - "appName": app_name, - "conversation_id": conversation_id, - "api_key_last_four_digits": api_key_last_four_digits, - "span_id": span_id, - "trace_id": trace_id, - "transaction_id": transaction.guid, - "response.number_of_messages": len(request_message_list), - "request.model": kwargs.get("model") or kwargs.get("engine") or "", - "request.temperature": kwargs.get("temperature", ""), - "request.max_tokens": kwargs.get("max_tokens", ""), - "vendor": "openAI", - "ingest_source": "Python", - "response.organization": "" if exc_organization is None else exc_organization, - "duration": ft.duration, - "error": True, + ft = FunctionTrace(name=function_name, group="Llm/completion/OpenAI") + ft.__enter__() + # Get trace information + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + try: + return_val = await wrapped(*args, **kwargs) + return_val._nr_ft = ft + except Exception as exc: + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + exc_organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": completion_id, } - transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) - - create_chat_completion_message_event( - transaction, - app_name, - request_message_list, - completion_id, - span_id, - trace_id, - "", - None, - "", - conversation_id, - None, - ) - - raise + else: + exc_organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": completion_id, + } + # Override the default message if it is not empty. + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + + ft.notice_error( + attributes=notice_error_attributes, + ) + # Gather attributes to add to embedding summary event in error context + error_chat_completion_dict = { + "id": completion_id, + "appName": app_name, + "conversation_id": conversation_id, + "api_key_last_four_digits": api_key_last_four_digits, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "response.number_of_messages": len(request_message_list), + "request.model": kwargs.get("model") or kwargs.get("engine") or "", + "request.temperature": kwargs.get("temperature", ""), + "request.max_tokens": kwargs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.organization": "" if exc_organization is None else exc_organization, + "duration": ft.duration, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", error_chat_completion_dict) + + create_chat_completion_message_event( + transaction, + app_name, + request_message_list, + completion_id, + span_id, + trace_id, + "", + None, + "", + conversation_id, + None, + ) + + ft.__exit__(*sys.exc_info()) + raise + + stream = kwargs.get("stream", False) + # If response is not a stream generator, we exit the function trace now. + if not stream: + ft.__exit__(None, None, None) if not return_val: return return_val + if stream: + # The function trace will be exited when in the final iteration of the response + # generator. + setattr(return_val, "_nr_ft", ft) + setattr(return_val, "_nr_openai_attrs", getattr(return_val, "_nr_openai_attrs", {})) + return_val._nr_openai_attrs["messages"] = kwargs.get("messages", []) + return_val._nr_openai_attrs["temperature"] = kwargs.get("temperature", "") + return_val._nr_openai_attrs["max_tokens"] = kwargs.get("max_tokens", "") + return_val._nr_openai_attrs["request.model"] = kwargs.get("model") or kwargs.get("engine") or "" + return_val._nr_openai_attrs["api_key_last_four_digits"] = api_key_last_four_digits + return return_val + + # If response is not a stream generator, record the event data. # At this point, we have a response so we can grab attributes only available on the response object response_headers = getattr(return_val, "_nr_response_headers", None) # In v1, response objects are pydantic models so this function call converts the @@ -851,7 +888,9 @@ def wrap_convert_to_openai_object(wrapped, instance, args, kwargs): resp = args[0] returned_response = wrapped(*args, **kwargs) - if isinstance(resp, openai.openai_response.OpenAIResponse): + if isinstance(returned_response, openai.openai_object.OpenAIObject) and isinstance( + resp, openai.openai_response.OpenAIResponse + ): setattr(returned_response, "_nr_response_headers", getattr(resp, "_headers", {})) return returned_response @@ -895,6 +934,317 @@ def instrument_openai_util(module): setattr(module.convert_to_openai_object, "_nr_wrapped", True) +class GeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(GeneratorProxy, self).__init__(wrapped) + + def __iter__(self): + return self + + def __next__(self): + transaction = current_transaction() + if not transaction: + return self.__wrapped__.__next__() + + return_val = None + try: + return_val = self.__wrapped__.__next__() + record_stream_chunk(self, return_val) + except StopIteration as e: + record_events_on_stop_iteration(self, transaction) + raise + except Exception as exc: + record_error(self, transaction, exc) + raise + return return_val + + def close(self): + return super(GeneratorProxy, self).close() + + +def record_stream_chunk(self, return_val): + if return_val: + choices = return_val.get("choices", []) + self._nr_openai_attrs["response.model"] = return_val.get("model", "") + self._nr_openai_attrs["id"] = return_val.get("id", "") + self._nr_openai_attrs["response.organization"] = return_val.get("organization", "") + if choices: + delta = choices[0].get("delta", {}) + if delta: + self._nr_openai_attrs["content"] = self._nr_openai_attrs.get("content", "") + delta.get("content", "") + self._nr_openai_attrs["role"] = self._nr_openai_attrs.get("role", None) or delta.get("role") + self._nr_openai_attrs["finish_reason"] = choices[0].get("finish_reason", "") + self._nr_openai_attrs["response_headers"] = getattr(return_val, "_nr_response_headers", {}) + + +def record_events_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + self._nr_ft.__exit__(None, None, None) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + raise + + message_ids = record_streaming_chat_completion_events(self, transaction, openai_attrs) + # Cache message ids on transaction for retrieval after open ai call completion. + if not hasattr(transaction, "_nr_message_ids"): + transaction._nr_message_ids = {} + response_id = openai_attrs.get("response_id", None) + transaction._nr_message_ids[response_id] = message_ids + + +def record_error(self, transaction, exc): + if hasattr(self, "_nr_ft"): + openai_attrs = getattr(self, "_nr_openai_attrs", {}) + + # If there are no openai attrs exit early as there's no data to record. + if not openai_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + raise + + record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc) + + +def record_streaming_chat_completion_events_error(self, transaction, openai_attrs, exc): + chat_completion_id = str(uuid.uuid4()) + if OPENAI_V1: + response = getattr(exc, "response", "") + response_headers = getattr(response, "headers", "") + organization = response_headers.get("openai-organization", "") if response_headers else "" + # There appears to be a bug here in openai v1 where despite having code, + # param, etc in the error response, they are not populated on the exception + # object so grab them from the response body object instead. + body = getattr(exc, "body", {}) or {} + notice_error_attributes = { + "http.statusCode": getattr(exc, "status_code", "") or "", + "error.message": body.get("message", "") or "", + "error.code": body.get("code", "") or "", + "error.param": body.get("param", "") or "", + "completion_id": chat_completion_id, + } + else: + organization = getattr(exc, "organization", "") + notice_error_attributes = { + "http.statusCode": getattr(exc, "http_status", ""), + "error.message": getattr(exc, "_message", ""), + "error.code": getattr(getattr(exc, "error", ""), "code", ""), + "error.param": getattr(exc, "param", ""), + "completion_id": chat_completion_id, + } + message = notice_error_attributes.pop("error.message") + if message: + exc._nr_message = message + self._nr_ft.notice_error( + attributes=notice_error_attributes, + ) + self._nr_ft.__exit__(*sys.exc_info()) + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + "response.organization": organization, + "error": True, + } + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + + +def record_streaming_chat_completion_events(self, transaction, openai_attrs): + content = openai_attrs.get("content", None) + role = openai_attrs.get("role") + + custom_attrs_dict = transaction._custom_params + conversation_id = custom_attrs_dict.get("llm.conversation_id", "") + + chat_completion_id = str(uuid.uuid4()) + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id", "") + trace_id = available_metadata.get("trace.id", "") + + response_headers = openai_attrs.get("response_headers", {}) + settings = transaction.settings if transaction.settings is not None else global_settings() + response_id = openai_attrs.get("id", None) + request_id = response_headers.get("x-request-id", "") + organization = response_headers.get("openai-organization", "") + + api_key_last_four_digits = openai_attrs.get("api_key_last_four_digits", "") + + messages = openai_attrs.get("messages", []) + + chat_completion_summary_dict = { + "id": chat_completion_id, + "appName": settings.app_name, + "conversation_id": conversation_id, + "span_id": span_id, + "trace_id": trace_id, + "transaction_id": transaction.guid, + "request_id": request_id, + "api_key_last_four_digits": api_key_last_four_digits, + "duration": self._nr_ft.duration, + "request.model": openai_attrs.get("request.model", ""), + "response.model": openai_attrs.get("response.model", ""), + "response.organization": organization, + # Usage tokens are not supported in streaming for now. + "request.temperature": openai_attrs.get("temperature", ""), + "request.max_tokens": openai_attrs.get("max_tokens", ""), + "response.choices.finish_reason": openai_attrs.get("finish_reason", ""), + "response.headers.llmVersion": response_headers.get("openai-version", ""), + "response.headers.ratelimitLimitRequests": check_rate_limit_header( + response_headers, "x-ratelimit-limit-requests", True + ), + "response.headers.ratelimitLimitTokens": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens", True + ), + "response.headers.ratelimitResetTokens": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens", False + ), + "response.headers.ratelimitResetRequests": check_rate_limit_header( + response_headers, "x-ratelimit-reset-requests", False + ), + "response.headers.ratelimitRemainingTokens": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens", True + ), + "response.headers.ratelimitRemainingRequests": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-requests", True + ), + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": len(messages) + (1 if content else 0), + } + + transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) + + output_message_list = [] + if content: + output_message_list = [{"content": content, "role": role}] + + return create_chat_completion_message_event( + transaction, + settings.app_name, + list(messages), + chat_completion_id, + span_id, + trace_id, + openai_attrs.get("response.model", ""), + response_id, + request_id, + conversation_id, + output_message_list, + ) + + +class AsyncGeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(AsyncGeneratorProxy, self).__init__(wrapped) + + def __aiter__(self): + self._nr_wrapped_iter = self.__wrapped__.__aiter__() + return self + + async def __anext__(self): + transaction = current_transaction() + if not transaction: + return await self._nr_wrapped_iter.__anext__() + + return_val = None + try: + return_val = await self._nr_wrapped_iter.__anext__() + record_stream_chunk(self, return_val) + except StopAsyncIteration as e: + record_events_on_stop_iteration(self, transaction) + raise + except Exception as exc: + record_error(self, transaction, exc) + raise + return return_val + + async def aclose(self): + return await super(AsyncGeneratorProxy, self).aclose() + + +def wrap_engine_api_resource_create_sync(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = wrapped(*args, **kwargs) + + if stream: + return GeneratorProxy(return_val) + else: + return return_val + + +async def wrap_engine_api_resource_create_async(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return await wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = await wrapped(*args, **kwargs) + + if stream: + return AsyncGeneratorProxy(return_val) + else: + return return_val + + def instrument_openai_api_resources_embedding(module): if hasattr(module, "Embedding"): if hasattr(module.Embedding, "create"): @@ -942,3 +1292,10 @@ def instrument_openai_base_client(module): wrap_function_wrapper(module, "SyncAPIClient._process_response", wrap_base_client_process_response_sync) if hasattr(module.AsyncAPIClient, "_process_response"): wrap_function_wrapper(module, "AsyncAPIClient._process_response", wrap_base_client_process_response_async) + + +def instrument_openai_api_resources_abstract_engine_api_resource(module): + if hasattr(module.EngineAPIResource, "create"): + wrap_function_wrapper(module, "EngineAPIResource.create", wrap_engine_api_resource_create_sync) + if hasattr(module.EngineAPIResource, "acreate"): + wrap_function_wrapper(module, "EngineAPIResource.acreate", wrap_engine_api_resource_create_async) diff --git a/tests/external_botocore/test_botocore_sqs.py b/tests/external_botocore/test_botocore_sqs.py index d391931a3e..67744d1339 100644 --- a/tests/external_botocore/test_botocore_sqs.py +++ b/tests/external_botocore/test_botocore_sqs.py @@ -30,7 +30,6 @@ BOTOCORE_VERSION = get_package_version_tuple("botocore") url = "sqs.us-east-1.amazonaws.com" - if BOTOCORE_VERSION < (1, 29, 0): url = "queue.amazonaws.com" diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index edcfc47f35..17ce71adac 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -30,6 +30,325 @@ # created by an external call. # 3) This app runs on a separate thread meaning it won't block the test app. +STREAMED_RESPONSES = { + "Stream parsing error.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "new-relic-nkmd8b", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "199", + "x-ratelimit-remaining-tokens": "39940", + "x-ratelimit-reset-requests": "7m12s", + "x-ratelimit-reset-tokens": "90ms", + "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd", + }, + 200, + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + ], + ], + "Invalid API key.": [ + {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"}, + 401, + { + "error": { + "message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": None, + "code": "invalid_api_key", + } + }, + ], + "Model does not exist.": [ + { + "Content-Type": "application/json; charset=utf-8", + "x-request-id": "cfdf51fb795362ae578c12a21796262c", + }, + 404, + { + "error": { + "message": "The model `does-not-exist` does not exist", + "type": "invalid_request_error", + "param": None, + "code": "model_not_found", + } + }, + ], + "You are a scientist.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "new-relic-nkmd8b", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "199", + "x-ratelimit-remaining-tokens": "39940", + "x-ratelimit-reset-requests": "7m12s", + "x-ratelimit-reset-tokens": "90ms", + "x-request-id": "49dbbffbd3c3f4612aa48def69059ccd", + }, + 200, + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}], + }, + ], + ], +} + +RESPONSES_V1 = { + "You are a scientist.": [ + { + "Content-Type": "text/event-stream", + "openai-model": "gpt-3.5-turbo-0613", + "openai-organization": "foobar-jtbczk", + "openai-processing-ms": "516", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "196", + "x-ratelimit-remaining-tokens": "39880", + "x-ratelimit-reset-requests": "23m5.129s", + "x-ratelimit-reset-tokens": "180ms", + "x-request-id": "5c53c9b80af57a1c9b38568f01dcde7f", + }, + 200, + [ + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": None, "finish_reason": None} + ], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Fahrenheit"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " is"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " equal"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " to"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " "}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "100"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " degrees"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " Celsius"}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "."}, "logprobs": None, "finish_reason": None}], + }, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", + "object": "chat.completion.chunk", + "created": 1706565311, + "model": "gpt-3.5-turbo-0613", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {}, "logprobs": None, "finish_reason": "stop"}], + }, + ], + ] +} RESPONSES_V1 = { "You are a scientist.": [ { @@ -304,7 +623,7 @@ def simple_get(openai_version, extract_shortened_prompt): def _simple_get(self): content_len = int(self.headers.get("content-length")) content = json.loads(self.rfile.read(content_len).decode("utf-8")) - + stream = content.get("stream", False) prompt = extract_shortened_prompt(content) if not prompt: self.send_response(500) @@ -316,6 +635,8 @@ def _simple_get(self): if openai_version < (1, 0): mocked_responses = RESPONSES + if stream: + mocked_responses = STREAMED_RESPONSES else: mocked_responses = RESPONSES_V1 @@ -338,7 +659,16 @@ def _simple_get(self): self.end_headers() # Send response body - self.wfile.write(json.dumps(response).encode("utf-8")) + if stream and status_code < 400: + for resp in response: + data = json.dumps(resp).encode("utf-8") + if prompt == "Stream parsing error.": + # Force a parsing error by writing an invalid streamed response. + self.wfile.write(b"data: %s" % data) + else: + self.wfile.write(b"data: %s\n\n" % data) + else: + self.wfile.write(json.dumps(response).encode("utf-8")) return return _simple_get diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py index 180bec9cc4..976ba7875a 100644 --- a/tests/mlmodel_openai/conftest.py +++ b/tests/mlmodel_openai/conftest.py @@ -32,7 +32,8 @@ ) from newrelic.api.transaction import current_transaction -from newrelic.common.object_wrapper import wrap_function_wrapper +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper +from newrelic.common.signature import bind_args _default_settings = { "transaction_tracer.explain_threshold": 0.0, @@ -54,17 +55,18 @@ "test_chat_completion_v1.py", "test_chat_completion_error_v1.py", "test_embeddings_v1.py", - "test_get_llm_message_ids_v1.py", - "test_chat_completion_error_v1.py", "test_embeddings_error_v1.py", + "test_get_llm_message_ids_v1.py", ] else: collect_ignore = [ "test_embeddings.py", "test_embeddings_error.py", "test_chat_completion.py", - "test_get_llm_message_ids.py", "test_chat_completion_error.py", + "test_chat_completion_stream.py", + "test_chat_completion_stream_error.py", + "test_get_llm_message_ids.py", ] @@ -148,6 +150,7 @@ def openai_server( wrap_openai_api_requestor_request, wrap_openai_api_requestor_interpret_response, wrap_httpx_client_send, + wrap_engine_api_resource_create, ): """ This fixture will either create a mocked backend for testing purposes, or will @@ -164,6 +167,11 @@ def openai_server( wrap_function_wrapper( "openai.api_requestor", "APIRequestor._interpret_response", wrap_openai_api_requestor_interpret_response ) + wrap_function_wrapper( + "openai.api_resources.abstract.engine_api_resource", + "EngineAPIResource.create", + wrap_engine_api_resource_create, + ) yield # Run tests else: # Apply function wrappers to record data @@ -247,20 +255,22 @@ def _wrap_openai_api_requestor_request(wrapped, instance, args, kwargs): # Send request result = wrapped(*args, **kwargs) - # Clean up data - data = result[0].data - headers = result[0]._headers - headers = dict( - filter( - lambda k: k[0].lower() in RECORDED_HEADERS - or k[0].lower().startswith("openai") - or k[0].lower().startswith("x-ratelimit"), - headers.items(), + # Append response data to audit log + if not kwargs.get("stream", False): + # Clean up data + data = result[0].data + headers = result[0]._headers + headers = dict( + filter( + lambda k: k[0].lower() in RECORDED_HEADERS + or k[0].lower().startswith("openai") + or k[0].lower().startswith("x-ratelimit"), + headers.items(), + ) ) - ) - - # Log response - OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data # Append response data to audit log + OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, 200, data + else: + OPENAI_AUDIT_LOG_CONTENTS[prompt] = [None, 200, []] return result return _wrap_openai_api_requestor_request @@ -272,3 +282,70 @@ def bind_request_params(method, url, params=None, *args, **kwargs): def bind_request_interpret_response_params(result, stream): return result.content.decode("utf-8"), result.status_code, result.headers + + +@pytest.fixture(scope="session") +def generator_proxy(): + class GeneratorProxy(ObjectProxy): + def __init__(self, wrapped): + super(GeneratorProxy, self).__init__(wrapped) + + def __iter__(self): + return self + + # Make this Proxy a pass through to our instrumentation's proxy by passing along + # get attr and set attr calls to our instrumentation's proxy. + def __getattr__(self, attr): + return self.__wrapped__.__getattr__(attr) + + def __setattr__(self, attr, value): + return self.__wrapped__.__setattr__(attr, value) + + def __next__(self): + transaction = current_transaction() + if not transaction: + return self.__wrapped__.__next__() + + try: + return_val = self.__wrapped__.__next__() + if return_val: + prompt = [k for k in OPENAI_AUDIT_LOG_CONTENTS.keys()][-1] + headers = dict( + filter( + lambda k: k[0].lower() in RECORDED_HEADERS + or k[0].lower().startswith("openai") + or k[0].lower().startswith("x-ratelimit"), + return_val._nr_response_headers.items(), + ) + ) + OPENAI_AUDIT_LOG_CONTENTS[prompt][0] = headers + OPENAI_AUDIT_LOG_CONTENTS[prompt][2].append(return_val.to_dict_recursive()) + return return_val + except Exception as e: + raise + + def close(self): + return super(GeneratorProxy, self).close() + + return GeneratorProxy + + +@pytest.fixture(scope="session") +def wrap_engine_api_resource_create(generator_proxy): + def _wrap_engine_api_resource_create(wrapped, instance, args, kwargs): + transaction = current_transaction() + + if not transaction: + return wrapped(*args, **kwargs) + + bound_args = bind_args(wrapped, args, kwargs) + stream = bound_args["params"].get("stream", False) + + return_val = wrapped(*args, **kwargs) + + if stream: + return generator_proxy(return_val) + else: + return return_val + + return _wrap_engine_api_resource_create diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 5681dbb57e..532e5bdc9b 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -16,6 +16,7 @@ from testing_support.fixtures import ( override_application_settings, reset_core_stats_engine, + validate_attributes, validate_custom_event_count, validate_attributes, ) diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py new file mode 100644 index 0000000000..72bdaf9b36 --- /dev/null +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -0,0 +1,366 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import openai +from testing_support.fixtures import ( + reset_core_stats_engine, + validate_attributes, + validate_custom_event_count, +) +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import ( + validate_transaction_metrics, +) + +from newrelic.api.background_task import background_task +from newrelic.api.transaction import add_custom_attribute + +disabled_custom_insights_settings = {"custom_insights_events.enabled": False} + +_test_openai_chat_completion_messages = ( + {"role": "system", "content": "You are a scientist."}, + {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, +) + +chat_completion_recorded_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events) +# One summary event, one system message, one user message, and one response message from the assistant +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + name="test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_with_convo_id", + custom_metrics=[ + ("Python/ML/OpenAI/%s" % openai.__version__, 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +chat_completion_recorded_events_no_convo_id = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "transaction_id": "transaction-id", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openAI", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openAI", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_no_convo_id) +# One summary event, one system message, one user message, and one response message from the assistant +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_sync_in_txn_no_convo_id", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@background_task() +def test_openai_chat_completion_sync_in_txn_no_convo_id(set_trace_info): + set_trace_info() + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_openai_chat_completion_sync_outside_txn(): + add_custom_attribute("llm.conversation_id", "my-awesome-id") + openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_no_convo_id) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_unset", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@background_task() +def test_openai_chat_completion_async_conversation_id_unset(loop, set_trace_info): + set_trace_info() + + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + "test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_transaction_metrics( + name="test_chat_completion_stream:test_openai_chat_completion_async_conversation_id_set", + custom_metrics=[ + ("Python/ML/OpenAI/%s" % openai.__version__, 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_async_conversation_id_set(loop, set_trace_info): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_openai_chat_completion_async_outside_transaction(loop): + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + +def test_openai_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(): + assert openai.ChatCompletion._nr_wrapped + assert openai.util.convert_to_openai_object._nr_wrapped diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py new file mode 100644 index 0000000000..15fb1512dd --- /dev/null +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -0,0 +1,708 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import openai +import pytest +from testing_support.fixtures import ( + dt_enabled, + reset_core_stats_engine, + validate_custom_event_count, +) +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import ( + validate_error_trace_attributes, +) +from testing_support.validators.validate_span_events import validate_span_events +from testing_support.validators.validate_transaction_metrics import ( + validate_transaction_metrics, +) + +from newrelic.api.background_task import background_task +from newrelic.api.transaction import add_custom_attribute +from newrelic.common.object_names import callable_name + +_test_openai_chat_completion_messages = ( + {"role": "system", "content": "You are a scientist."}, + {"role": "user", "content": "What is 212 degrees Fahrenheit converted to Celsius?"}, +) + +expected_events_on_no_model_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "", # No model in this test case + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 2, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 1, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.param": "engine", + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Must provide an 'engine' or 'model' parameter to create a ", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_no_model_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_invalid_request_error_no_model(set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + # no model provided, + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_invalid_model_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "does-not-exist", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Model does not exist.", + "role": "user", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.code": "model_not_found", + "http.statusCode": 404, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "The model `does-not-exist` does not exist", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_invalid_model_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + generator = openai.ChatCompletion.create( + model="does-not-exist", + messages=({"role": "user", "content": "Model does not exist."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_auth_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "my-awesome-id", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 2, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "You are a scientist.", + "role": "system", + "response.model": "", + "completion_id": None, + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "my-awesome-id", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 1, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": {}, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = ', or you can set the environment variable OPENAI_API_KEY=). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = '. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_authentication_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_auth_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_authentication_error(monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +expected_events_on_wrong_api_key_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-BEEF", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Invalid API key.", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 401, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_wrong_api_key_error(monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + monkeypatch.setattr(openai, "api_key", "DEADBEEF") + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Invalid API key."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.param": "engine", + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Must provide an 'engine' or 'model' parameter to create a ", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_no_model_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_no_model_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_invalid_request_error_no_model_async(loop, set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + loop.run_until_complete( + openai.ChatCompletion.acreate( + # no model provided, + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.InvalidRequestError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "error.code": "model_not_found", + "http.statusCode": 404, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "The model `does-not-exist` does not exist", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_invalid_model_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_trace_info): + with pytest.raises(openai.InvalidRequestError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="does-not-exist", + messages=({"role": "user", "content": "Model does not exist."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": {}, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "No API key provided. You can set your API key in code using 'openai.api_key = ', or you can set the environment variable OPENAI_API_KEY=). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = '. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_authentication_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_auth_error) +@validate_custom_event_count(count=3) +@background_task() +def test_chat_completion_authentication_error_async(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + monkeypatch.setattr(openai, "api_key", None) # openai.api_key = None + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.AuthenticationError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 401, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.", + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_wrong_api_key_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_on_wrong_api_key_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.AuthenticationError): + set_trace_info() + monkeypatch.setattr(openai, "api_key", "DEADBEEF") + loop.run_until_complete( + openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Invalid API key."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + ) + + +expected_events_stream_parsing_error = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "appName": "Python Agent Test (mlmodel_openai)", + "transaction_id": "transaction-id", + "conversation_id": "", + "span_id": None, + "trace_id": "trace-id", + "api_key_last_four_digits": "sk-CRET", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "openAI", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "appName": "Python Agent Test (mlmodel_openai)", + "conversation_id": "", + "request_id": "", + "span_id": None, + "trace_id": "trace-id", + "transaction_id": "transaction-id", + "content": "Stream parsing error.", + "role": "user", + "completion_id": None, + "response.model": "", + "sequence": 0, + "vendor": "openAI", + "ingest_source": "Python", + }, + ), +] + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.APIError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 200, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})', + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error_async", + scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], + rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], + background_task=True, +) +@validate_custom_events(expected_events_stream_parsing_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_stream_parsing_error_async(loop, monkeypatch, set_trace_info): + with pytest.raises(openai.error.APIError): + set_trace_info() + + async def consumer(): + generator = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Stream parsing error."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + +@dt_enabled +@reset_core_stats_engine() +@validate_error_trace_attributes( + callable_name(openai.error.APIError), + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 200, + }, + }, +) +@validate_span_events( + exact_agents={ + "error.message": 'HTTP code 200 from API ({"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "logprobs": null, "finish_reason": null}]}data: {"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv", "object": "chat.completion.chunk", "created": 1706565311, "model": "gpt-3.5-turbo-0613", "system_fingerprint": null, "choices": [{"index": 0, "delta": {"content": "212"}, "logprobs": null, "finish_reason": null}]})', + } +) +@validate_transaction_metrics( + "test_chat_completion_stream_error:test_chat_completion_stream_parsing_error", + scoped_metrics=[("Llm/completion/OpenAI/create", 1)], + rollup_metrics=[("Llm/completion/OpenAI/create", 1)], + background_task=True, +) +@validate_custom_events(expected_events_stream_parsing_error) +@validate_custom_event_count(count=2) +@background_task() +def test_chat_completion_stream_parsing_error(monkeypatch, set_trace_info): + with pytest.raises(openai.error.APIError): + set_trace_info() + + generator = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=({"role": "user", "content": "Stream parsing error."},), + temperature=0.7, + max_tokens=100, + stream=True, + ) + for resp in generator: + assert resp diff --git a/tox.ini b/tox.ini index a3968eac15..0cd1abc4b3 100644 --- a/tox.ini +++ b/tox.ini @@ -146,7 +146,7 @@ envlist = python-mlmodel_openai-openai107-{py312}, python-mlmodel_openai-openailatest-{py37,py38,py39,py310,py311,py312,pypy38}, ; langchain dependency faiss-cpu isn't compatible with 3.12 yet. - python-mlmodel_langchain-{py38,py39,py310,py311,pypy38}, + python-mlmodel_langchain-{py39,py310,py311}, python-logger_logging-{py27,py37,py38,py39,py310,py311,py312,pypy27,pypy38}, python-logger_loguru-{py37,py38,py39,py310,py311,py312,pypy38}-logurulatest, python-logger_loguru-py39-loguru{06,05},