diff --git a/libs/ai-endpoints/Makefile b/libs/ai-endpoints/Makefile
index 52863145..d79ec9d7 100644
--- a/libs/ai-endpoints/Makefile
+++ b/libs/ai-endpoints/Makefile
@@ -33,7 +33,6 @@ lint_tests: PYTHON_FILES=tests
 lint_tests: MYPY_CACHE=.mypy_cache_test
 
 lint lint_diff lint_package lint_tests:
-	./scripts/check_pydantic.sh .
 	./scripts/lint_imports.sh
 	poetry run ruff .
 	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
diff --git a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
index 4795e3c9..eb4cfdbc 100644
--- a/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
+++ b/libs/ai-endpoints/docs/chat/nvidia_ai_endpoints.ipynb
@@ -41,9 +41,7 @@
    "id": "e13eb331",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "%pip install --upgrade --quiet langchain-nvidia-ai-endpoints"
-   ]
+   "source": ["%pip install --upgrade --quiet langchain-nvidia-ai-endpoints"]
   },
   {
    "cell_type": "markdown",
@@ -71,18 +69,7 @@
    "id": "686c4d2f",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import getpass\n",
-    "import os\n",
-    "\n",
-    "# del os.environ['NVIDIA_API_KEY']  ## delete key and reset\n",
-    "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n",
-    "    print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n",
-    "else:\n",
-    "    nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n",
-    "    assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n",
-    "    os.environ[\"NVIDIA_API_KEY\"] = nvapi_key"
-   ]
+   "source": ["import getpass\nimport os\n\n# del os.environ['NVIDIA_API_KEY']  ## delete key and reset\nif os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n    print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\nelse:\n    nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n    assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n    os.environ[\"NVIDIA_API_KEY\"] = nvapi_key"]
   },
   {
    "cell_type": "markdown",
@@ -104,14 +91,7 @@
     "outputId": "e9c4cc72-8db6-414b-d8e9-95de93fc5db4"
    },
    "outputs": [],
-   "source": [
-    "## Core LC Chat Interface\n",
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "\n",
-    "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\n",
-    "result = llm.invoke(\"Write a ballad about LangChain.\")\n",
-    "print(result.content)"
-   ]
+   "source": ["## Core LC Chat Interface\nfrom langchain_nvidia_ai_endpoints import ChatNVIDIA\n\nllm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\nresult = llm.invoke(\"Write a ballad about LangChain.\")\nprint(result.content)"]
   },
   {
    "cell_type": "markdown",
@@ -130,12 +110,7 @@
    "id": "49838930",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "\n",
-    "# connect to an embedding NIM running at localhost:8000, specifying a specific model\n",
-    "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")"
-   ]
+   "source": ["from langchain_nvidia_ai_endpoints import ChatNVIDIA\n\n# connect to an embedding NIM running at localhost:8000, specifying a specific model\nllm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")"]
   },
   {
    "cell_type": "markdown",
@@ -153,11 +128,7 @@
    "id": "01fa5095-be72-47b0-8247-e9fac799435d",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "print(llm.batch([\"What's 2*3?\", \"What's 2*6?\"]))\n",
-    "# Or via the async API\n",
-    "# await llm.abatch([\"What's 2*3?\", \"What's 2*6?\"])"
-   ]
+   "source": ["print(llm.batch([\"What's 2*3?\", \"What's 2*6?\"]))\n# Or via the async API\n# await llm.abatch([\"What's 2*3?\", \"What's 2*6?\"])"]
   },
   {
    "cell_type": "code",
@@ -165,11 +136,7 @@
    "id": "75189ac6-e13f-414f-9064-075c77d6e754",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "for chunk in llm.stream(\"How far can a seagull fly in one day?\"):\n",
-    "    # Show the token separations\n",
-    "    print(chunk.content, end=\"|\")"
-   ]
+   "source": ["for chunk in llm.stream(\"How far can a seagull fly in one day?\"):\n    # Show the token separations\n    print(chunk.content, end=\"|\")"]
   },
   {
    "cell_type": "code",
@@ -177,12 +144,7 @@
    "id": "8a9a4122-7a10-40c0-a979-82a769ce7f6a",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "async for chunk in llm.astream(\n",
-    "    \"How long does it take for monarch butterflies to migrate?\"\n",
-    "):\n",
-    "    print(chunk.content, end=\"|\")"
-   ]
+   "source": ["async for chunk in llm.astream(\n    \"How long does it take for monarch butterflies to migrate?\"\n):\n    print(chunk.content, end=\"|\")"]
   },
   {
    "cell_type": "markdown",
@@ -204,10 +166,7 @@
    "id": "5b8a312d-38e9-4528-843e-59451bdadbac",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "ChatNVIDIA.get_available_models()\n",
-    "# llm.get_available_models()"
-   ]
+   "source": ["ChatNVIDIA.get_available_models()\n# llm.get_available_models()"]
   },
   {
    "cell_type": "markdown",
@@ -247,19 +206,7 @@
    "id": "f5f7aee8-e90c-4d5a-ac97-0dd3d45c3f4c",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_core.output_parsers import StrOutputParser\n",
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [(\"system\", \"You are a helpful AI assistant named Fred.\"), (\"user\", \"{input}\")]\n",
-    ")\n",
-    "chain = prompt | ChatNVIDIA(model=\"meta/llama3-8b-instruct\") | StrOutputParser()\n",
-    "\n",
-    "for txt in chain.stream({\"input\": \"What's your name?\"}):\n",
-    "    print(txt, end=\"\")"
-   ]
+   "source": ["from langchain_core.output_parsers import StrOutputParser\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom langchain_nvidia_ai_endpoints import ChatNVIDIA\n\nprompt = ChatPromptTemplate.from_messages(\n    [(\"system\", \"You are a helpful AI assistant named Fred.\"), (\"user\", \"{input}\")]\n)\nchain = prompt | ChatNVIDIA(model=\"meta/llama3-8b-instruct\") | StrOutputParser()\n\nfor txt in chain.stream({\"input\": \"What's your name?\"}):\n    print(txt, end=\"\")"]
   },
   {
    "cell_type": "markdown",
@@ -277,21 +224,7 @@
    "id": "49aa569b-5f33-47b3-9edc-df58313eb038",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are an expert coding AI. Respond only in valid python; no narration whatsoever.\",\n",
-    "        ),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
-    "chain = prompt | ChatNVIDIA(model=\"meta/codellama-70b\") | StrOutputParser()\n",
-    "\n",
-    "for txt in chain.stream({\"input\": \"How do I solve this fizz buzz problem?\"}):\n",
-    "    print(txt, end=\"\")"
-   ]
+   "source": ["prompt = ChatPromptTemplate.from_messages(\n    [\n        (\n            \"system\",\n            \"You are an expert coding AI. Respond only in valid python; no narration whatsoever.\",\n        ),\n        (\"user\", \"{input}\"),\n    ]\n)\nchain = prompt | ChatNVIDIA(model=\"meta/codellama-70b\") | StrOutputParser()\n\nfor txt in chain.stream({\"input\": \"How do I solve this fizz buzz problem?\"}):\n    print(txt, end=\"\")"]
   },
   {
    "cell_type": "markdown",
@@ -311,15 +244,7 @@
    "id": "26625437-1695-440f-b792-b85e6add9a90",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import IPython\n",
-    "import requests\n",
-    "\n",
-    "image_url = \"https://www.nvidia.com/content/dam/en-zz/Solutions/research/ai-playground/nvidia-picasso-3c33-p@2x.jpg\"  ## Large Image\n",
-    "image_content = requests.get(image_url).content\n",
-    "\n",
-    "IPython.display.Image(image_content)"
-   ]
+   "source": ["import IPython\nimport requests\n\nimage_url = \"https://www.nvidia.com/content/dam/en-zz/Solutions/research/ai-playground/nvidia-picasso-3c33-p@2x.jpg\"  ## Large Image\nimage_content = requests.get(image_url).content\n\nIPython.display.Image(image_content)"]
   },
   {
    "cell_type": "code",
@@ -327,11 +252,7 @@
    "id": "dfbbe57c-27a5-4cbb-b967-19c4e7d29fd0",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "\n",
-    "llm = ChatNVIDIA(model=\"nvidia/neva-22b\")"
-   ]
+   "source": ["from langchain_nvidia_ai_endpoints import ChatNVIDIA\n\nllm = ChatNVIDIA(model=\"nvidia/neva-22b\")"]
   },
   {
    "cell_type": "markdown",
@@ -347,15 +268,76 @@
    "id": "432ea2a2-4d39-43f8-a236-041294171f14",
    "metadata": {},
    "outputs": [],
+   "source": ["from langchain_core.messages import HumanMessage\n\nllm.invoke(\n    [\n        HumanMessage(\n            content=[\n                {\"type\": \"text\", \"text\": \"Describe this image:\"},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n            ]\n        )\n    ]\n)"]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "25e8db7c",
+   "metadata": {},
    "source": [
-    "from langchain_core.messages import HumanMessage\n",
+    "#### Passing an image as an NVCF asset\n",
+    "\n",
+    "If your image is sufficiently large or you will pass it multiple times in a chat conversation, you may upload it once and reference it in your chat conversation.\n",
     "\n",
+    "See https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/assets.html for details about how upload the image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "091f7fce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "\n",
+    "content_type = \"image/jpg\"\n",
+    "description = \"example-image-from-lc-nv-ai-e-notebook\"\n",
+    "\n",
+    "create_response = requests.post(\n",
+    "    \"https://api.nvcf.nvidia.com/v2/nvcf/assets\",\n",
+    "    headers={\n",
+    "        \"Authorization\": f\"Bearer {os.environ['NVIDIA_API_KEY']}\",\n",
+    "        \"accept\": \"application/json\",\n",
+    "        \"Content-Type\": \"application/json\",\n",
+    "    },\n",
+    "    json={\n",
+    "        \"contentType\": content_type,\n",
+    "        \"description\": description\n",
+    "    }\n",
+    ")\n",
+    "create_response.raise_for_status()\n",
+    "\n",
+    "upload_response = requests.put(\n",
+    "    create_response.json()[\"uploadUrl\"],\n",
+    "    headers={\n",
+    "        \"Content-Type\": content_type,\n",
+    "        \"x-amz-meta-nvcf-asset-description\": description,\n",
+    "    },\n",
+    "    data=image_content,\n",
+    ")\n",
+    "upload_response.raise_for_status()\n",
+    "\n",
+    "asset_id = create_response.json()[\"assetId\"]\n",
+    "asset_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c24be59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "llm.invoke(\n",
     "    [\n",
     "        HumanMessage(\n",
     "            content=[\n",
-    "                {\"type\": \"text\", \"text\": \"Describe this image:\"},\n",
-    "                {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
+    "                {\"type\": \"text\", \"text\": \"Describe this image\"},\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\"url\": f\"data:{content_type};asset_id,{asset_id}\"},\n",
+    "                },\n",
     "            ]\n",
     "        )\n",
     "    ]\n",
@@ -384,15 +366,7 @@
    "id": "c58f1dd0",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import IPython\n",
-    "import requests\n",
-    "\n",
-    "image_url = \"https://picsum.photos/seed/kitten/300/200\"\n",
-    "image_content = requests.get(image_url).content\n",
-    "\n",
-    "IPython.display.Image(image_content)"
-   ]
+   "source": ["import IPython\nimport requests\n\nimage_url = \"https://picsum.photos/seed/kitten/300/200\"\nimage_content = requests.get(image_url).content\n\nIPython.display.Image(image_content)"]
   },
   {
    "cell_type": "code",
@@ -400,28 +374,7 @@
    "id": "8c721629-42eb-4006-bf68-0296f7925ebc",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import base64\n",
-    "\n",
-    "from langchain_core.messages import HumanMessage\n",
-    "\n",
-    "## Works for simpler images. For larger images, see actual implementation\n",
-    "b64_string = base64.b64encode(image_content).decode(\"utf-8\")\n",
-    "\n",
-    "llm.invoke(\n",
-    "    [\n",
-    "        HumanMessage(\n",
-    "            content=[\n",
-    "                {\"type\": \"text\", \"text\": \"Describe this image:\"},\n",
-    "                {\n",
-    "                    \"type\": \"image_url\",\n",
-    "                    \"image_url\": {\"url\": f\"data:image/png;base64,{b64_string}\"},\n",
-    "                },\n",
-    "            ]\n",
-    "        )\n",
-    "    ]\n",
-    ")"
-   ]
+   "source": ["import base64\n\nfrom langchain_core.messages import HumanMessage\n\n## Works for simpler images. For larger images, see actual implementation\nb64_string = base64.b64encode(image_content).decode(\"utf-8\")\n\nllm.invoke(\n    [\n        HumanMessage(\n            content=[\n                {\"type\": \"text\", \"text\": \"Describe this image:\"},\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\"url\": f\"data:image/png;base64,{b64_string}\"},\n                },\n            ]\n        )\n    ]\n)"]
   },
   {
    "cell_type": "markdown",
@@ -439,10 +392,7 @@
    "id": "00c06a9a-497b-4192-a842-b075e27401aa",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "base64_with_mime_type = f\"data:image/png;base64,{b64_string}\"\n",
-    "llm.invoke(f'What\\'s in this image?\\n<img src=\"{base64_with_mime_type}\" />')"
-   ]
+   "source": ["base64_with_mime_type = f\"data:image/png;base64,{b64_string}\"\nllm.invoke(f'What\\'s in this image?\\n<img src=\"{base64_with_mime_type}\" />')"]
   },
   {
    "cell_type": "markdown",
@@ -470,9 +420,7 @@
    "id": "082ccb21-91e1-4e71-a9ba-4bff1e89f105",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "%pip install --upgrade --quiet langchain"
-   ]
+   "source": ["%pip install --upgrade --quiet langchain"]
   },
   {
    "cell_type": "code",
@@ -482,41 +430,7 @@
     "id": "fd2c6bc1"
    },
    "outputs": [],
-   "source": [
-    "from langchain_core.chat_history import InMemoryChatMessageHistory\n",
-    "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
-    "\n",
-    "# store is a dictionary that maps session IDs to their corresponding chat histories.\n",
-    "store = {}  # memory is maintained outside the chain\n",
-    "\n",
-    "\n",
-    "# A function that returns the chat history for a given session ID.\n",
-    "def get_session_history(session_id: str) -> InMemoryChatMessageHistory:\n",
-    "    if session_id not in store:\n",
-    "        store[session_id] = InMemoryChatMessageHistory()\n",
-    "    return store[session_id]\n",
-    "\n",
-    "\n",
-    "chat = ChatNVIDIA(\n",
-    "    model=\"mistralai/mixtral-8x22b-instruct-v0.1\",\n",
-    "    temperature=0.1,\n",
-    "    max_tokens=100,\n",
-    "    top_p=1.0,\n",
-    ")\n",
-    "\n",
-    "#  Define a RunnableConfig object, with a `configurable` key. session_id determines thread\n",
-    "config = {\"configurable\": {\"session_id\": \"1\"}}\n",
-    "\n",
-    "conversation = RunnableWithMessageHistory(\n",
-    "    chat,\n",
-    "    get_session_history,\n",
-    ")\n",
-    "\n",
-    "conversation.invoke(\n",
-    "    \"Hi I'm Srijan Dubey.\",  # input or query\n",
-    "    config=config,\n",
-    ")"
-   ]
+   "source": ["from langchain_core.chat_history import InMemoryChatMessageHistory\nfrom langchain_core.runnables.history import RunnableWithMessageHistory\n\n# store is a dictionary that maps session IDs to their corresponding chat histories.\nstore = {}  # memory is maintained outside the chain\n\n\n# A function that returns the chat history for a given session ID.\ndef get_session_history(session_id: str) -> InMemoryChatMessageHistory:\n    if session_id not in store:\n        store[session_id] = InMemoryChatMessageHistory()\n    return store[session_id]\n\n\nchat = ChatNVIDIA(\n    model=\"mistralai/mixtral-8x22b-instruct-v0.1\",\n    temperature=0.1,\n    max_tokens=100,\n    top_p=1.0,\n)\n\n#  Define a RunnableConfig object, with a `configurable` key. session_id determines thread\nconfig = {\"configurable\": {\"session_id\": \"1\"}}\n\nconversation = RunnableWithMessageHistory(\n    chat,\n    get_session_history,\n)\n\nconversation.invoke(\n    \"Hi I'm Srijan Dubey.\",  # input or query\n    config=config,\n)"]
   },
   {
    "cell_type": "code",
@@ -531,12 +445,7 @@
     "outputId": "79acc89d-a820-4f2c-bac2-afe99da95580"
    },
    "outputs": [],
-   "source": [
-    "conversation.invoke(\n",
-    "    \"I'm doing well! Just having a conversation with an AI.\",\n",
-    "    config=config,\n",
-    ")"
-   ]
+   "source": ["conversation.invoke(\n    \"I'm doing well! Just having a conversation with an AI.\",\n    config=config,\n)"]
   },
   {
    "cell_type": "code",
@@ -551,12 +460,7 @@
     "outputId": "a1714513-a8fd-4d14-f974-233e39d5c4f5"
    },
    "outputs": [],
-   "source": [
-    "conversation.invoke(\n",
-    "    \"Tell me about yourself.\",\n",
-    "    config=config,\n",
-    ")"
-   ]
+   "source": ["conversation.invoke(\n    \"Tell me about yourself.\",\n    config=config,\n)"]
   },
   {
    "cell_type": "markdown",
@@ -584,10 +488,7 @@
    "id": "e36c8911",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "tool_models = [model for model in ChatNVIDIA.get_available_models() if model.supports_tools]\n",
-    "tool_models"
-   ]
+   "source": ["tool_models = [model for model in ChatNVIDIA.get_available_models() if model.supports_tools]\ntool_models"]
   },
   {
    "cell_type": "markdown",
@@ -603,21 +504,7 @@
    "id": "bd54f174",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_core.pydantic_v1 import Field\n",
-    "from langchain_core.tools import tool\n",
-    "\n",
-    "@tool\n",
-    "def get_current_weather(\n",
-    "    location: str = Field(..., description=\"The location to get the weather for.\")\n",
-    "):\n",
-    "    \"\"\"Get the current weather for a location.\"\"\"\n",
-    "    ...\n",
-    "\n",
-    "llm = ChatNVIDIA(model=tool_models[0].id).bind_tools(tools=[get_current_weather])\n",
-    "response = llm.invoke(\"What is the weather in Boston?\")\n",
-    "response.tool_calls"
-   ]
+   "source": ["from pydantic import Field\nfrom langchain_core.tools import tool\n\n@tool\ndef get_current_weather(\n    location: str = Field(..., description=\"The location to get the weather for.\")\n):\n    \"\"\"Get the current weather for a location.\"\"\"\n    ...\n\nllm = ChatNVIDIA(model=tool_models[0].id).bind_tools(tools=[get_current_weather])\nresponse = llm.invoke(\"What is the weather in Boston?\")\nresponse.tool_calls"]
   },
   {
    "cell_type": "markdown",
@@ -655,11 +542,7 @@
    "id": "0515f558",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
-    "structured_models = [model for model in ChatNVIDIA.get_available_models() if model.supports_structured_output]\n",
-    "structured_models"
-   ]
+   "source": ["from langchain_nvidia_ai_endpoints import ChatNVIDIA\nstructured_models = [model for model in ChatNVIDIA.get_available_models() if model.supports_structured_output]\nstructured_models"]
   },
   {
    "cell_type": "markdown",
@@ -675,17 +558,7 @@
    "id": "482c37e8",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "\n",
-    "class Person(BaseModel):\n",
-    "    first_name: str = Field(..., description=\"The person's first name.\")\n",
-    "    last_name: str = Field(..., description=\"The person's last name.\")\n",
-    "\n",
-    "llm = ChatNVIDIA(model=structured_models[0].id).with_structured_output(Person)\n",
-    "response = llm.invoke(\"Who is Michael Jeffrey Jordon?\")\n",
-    "response"
-   ]
+   "source": ["from pydantic import BaseModel, Field\n\nclass Person(BaseModel):\n    first_name: str = Field(..., description=\"The person's first name.\")\n    last_name: str = Field(..., description=\"The person's last name.\")\n\nllm = ChatNVIDIA(model=structured_models[0].id).with_structured_output(Person)\nresponse = llm.invoke(\"Who is Michael Jeffrey Jordon?\")\nresponse"]
   },
   {
    "cell_type": "markdown",
@@ -701,24 +574,7 @@
    "id": "7f802912",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from enum import Enum\n",
-    "\n",
-    "class Choices(Enum):\n",
-    "    A = \"A\"\n",
-    "    B = \"B\"\n",
-    "    C = \"C\"\n",
-    "\n",
-    "llm = ChatNVIDIA(model=structured_models[2].id).with_structured_output(Choices)\n",
-    "response = llm.invoke(\"\"\"\n",
-    "        What does 1+1 equal?\n",
-    "            A. -100\n",
-    "            B. 2\n",
-    "            C. doorstop\n",
-    "        \"\"\"\n",
-    ")\n",
-    "response"
-   ]
+   "source": ["from enum import Enum\n\nclass Choices(Enum):\n    A = \"A\"\n    B = \"B\"\n    C = \"C\"\n\nllm = ChatNVIDIA(model=structured_models[2].id).with_structured_output(Choices)\nresponse = llm.invoke(\"\"\"\n        What does 1+1 equal?\n            A. -100\n            B. 2\n            C. doorstop\n        \"\"\"\n)\nresponse"]
   },
   {
    "cell_type": "code",
@@ -726,19 +582,7 @@
    "id": "02b7ef29",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "model = structured_models[3].id\n",
-    "llm = ChatNVIDIA(model=model).with_structured_output(Choices)\n",
-    "print(model)\n",
-    "response = llm.invoke(\"\"\"\n",
-    "        What does 1+1 equal?\n",
-    "            A. -100\n",
-    "            B. 2\n",
-    "            C. doorstop\n",
-    "        \"\"\"\n",
-    ")\n",
-    "response"
-   ]
+   "source": ["model = structured_models[3].id\nllm = ChatNVIDIA(model=model).with_structured_output(Choices)\nprint(model)\nresponse = llm.invoke(\"\"\"\n        What does 1+1 equal?\n            A. -100\n            B. 2\n            C. doorstop\n        \"\"\"\n)\nresponse"]
   }
  ],
  "metadata": {
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
index 019cdb01..218a0ab4 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -20,13 +20,13 @@
 from urllib.parse import urlparse, urlunparse
 
 import requests
-from langchain_core.pydantic_v1 import (
+from pydantic import (
     BaseModel,
+    ConfigDict,
     Field,
     PrivateAttr,
     SecretStr,
-    root_validator,
-    validator,
+    field_validator,
 )
 from requests.models import Response
 
@@ -34,6 +34,9 @@
 
 logger = logging.getLogger(__name__)
 
+_API_KEY_VAR = "NVIDIA_API_KEY"
+_BASE_URL_VAR = "NVIDIA_BASE_URL"
+
 
 class _NVIDIAClient(BaseModel):
     """
@@ -41,20 +44,23 @@ class _NVIDIAClient(BaseModel):
     """
 
     default_hosted_model_name: str = Field(..., description="Default model name to use")
-    model_name: Optional[str] = Field(..., description="Name of the model to invoke")
+    # "mdl_name" because "model_" is a protected namespace in pydantic
+    mdl_name: Optional[str] = Field(..., description="Name of the model to invoke")
     model: Optional[Model] = Field(None, description="The model to invoke")
     is_hosted: bool = Field(True)
     cls: str = Field(..., description="Class Name")
 
     # todo: add a validator for requests.Response (last_response attribute) and
     #       remove arbitrary_types_allowed=True
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+    )
 
     ## Core defaults. These probably should not be changed
-    _api_key_var = "NVIDIA_API_KEY"
     base_url: str = Field(
-        ...,
+        default_factory=lambda: os.getenv(
+            _BASE_URL_VAR, "https://integrate.api.nvidia.com/v1"
+        ),
         description="Base URL for standard inference",
     )
     infer_path: str = Field(
@@ -71,13 +77,28 @@ class Config:
     )
     get_session_fn: Callable = Field(requests.Session)
 
-    api_key: Optional[SecretStr] = Field(description="API Key for service of choice")
+    api_key: Optional[SecretStr] = Field(
+        default_factory=lambda: SecretStr(
+            os.getenv(_API_KEY_VAR, "INTERNAL_LCNVAIE_ERROR")
+        )
+        if _API_KEY_VAR in os.environ
+        else None,
+        description="API Key for service of choice",
+    )
 
     ## Generation arguments
-    timeout: float = Field(60, ge=0, description="Timeout for waiting on response (s)")
-    interval: float = Field(0.02, ge=0, description="Interval for pulling response")
+    timeout: float = Field(
+        60,
+        ge=0,
+        description="The minimum amount of time (in sec) to poll after a 202 response",
+    )
+    interval: float = Field(
+        0.02,
+        ge=0,
+        description="Interval (in sec) between polling attempts after a 202 response",
+    )
     last_inputs: Optional[dict] = Field(
-        description="Last inputs sent over to the server"
+        default={}, description="Last inputs sent over to the server"
     )
     last_response: Response = Field(
         None, description="Last response sent from the server"
@@ -103,47 +124,25 @@ class Config:
     ###################################################################################
     ################### Validation and Initialization #################################
 
-    @validator("base_url")
+    @field_validator("base_url")
     def _validate_base_url(cls, v: str) -> str:
+        ## Making sure /v1 in added to the url
         if v is not None:
-            result = urlparse(v)
-            expected_format = "Expected format is 'http://host:port'."
-            # Ensure scheme and netloc (domain name) are present
-            if not (result.scheme and result.netloc):
-                raise ValueError(f"Invalid base_url format. {expected_format} Got: {v}")
-        return v
-
-    @root_validator(pre=True)
-    def _preprocess_args(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        # if api_key is not provided or None,
-        #  try to get it from the environment
-        # we can't use Field(default_factory=...)
-        #  because construction may happen with api_key=None
-        if values.get("api_key") is None:
-            values["api_key"] = os.getenv(cls._api_key_var)
-
-        ## Making sure /v1 in added to the url, followed by infer_path
-        if "base_url" in values:
-            base_url = values["base_url"].strip("/")
-            parsed = urlparse(base_url)
-            expected_format = "Expected format is: http://host:port"
+            parsed = urlparse(v)
 
+            # Ensure scheme and netloc (domain name) are present
             if not (parsed.scheme and parsed.netloc):
-                raise ValueError(
-                    f"Invalid base_url format. {expected_format} Got: {base_url}"
-                )
+                expected_format = "Expected format is: http://host:port"
+                raise ValueError(f"Invalid base_url format. {expected_format} Got: {v}")
 
-            if base_url.endswith(
+            if v.strip("/").endswith(
                 ("/embeddings", "/completions", "/rankings", "/reranking")
             ):
-                warnings.warn(f"Using {base_url}, ignoring the rest")
+                warnings.warn(f"Using {v}, ignoring the rest")
 
-            values["base_url"] = base_url = urlunparse(
-                (parsed.scheme, parsed.netloc, "v1", None, None, None)
-            )
-            values["infer_path"] = values["infer_path"].format(base_url=base_url)
+            v = urlunparse((parsed.scheme, parsed.netloc, "v1", None, None, None))
 
-        return values
+        return v
 
     # final validation after model is constructed
     # todo: when pydantic v2 is available,
@@ -165,10 +164,10 @@ def __init__(self, **kwargs: Any):
                 )
 
             # set default model for hosted endpoint
-            if not self.model_name:
-                self.model_name = self.default_hosted_model_name
+            if not self.mdl_name:
+                self.mdl_name = self.default_hosted_model_name
 
-            if model := determine_model(self.model_name):
+            if model := determine_model(self.mdl_name):
                 if not model.client:
                     warnings.warn(f"Unable to determine validity of {model.id}")
                 elif model.client != self.cls:
@@ -186,27 +185,27 @@ def __init__(self, **kwargs: Any):
                 candidates = [
                     model
                     for model in self.available_models
-                    if model.id == self.model_name
+                    if model.id == self.mdl_name
                 ]
                 assert len(candidates) <= 1, (
-                    f"Multiple candidates for {self.model_name} "
+                    f"Multiple candidates for {self.mdl_name} "
                     f"in `available_models`: {candidates}"
                 )
                 if candidates:
                     model = candidates[0]
                     warnings.warn(
-                        f"Found {self.model_name} in available_models, but type is "
+                        f"Found {self.mdl_name} in available_models, but type is "
                         "unknown and inference may fail."
                     )
                 else:
                     raise ValueError(
-                        f"Model {self.model_name} is unknown, check `available_models`"
+                        f"Model {self.mdl_name} is unknown, check `available_models`"
                     )
             self.model = model
-            self.model_name = self.model.id  # name may change because of aliasing
+            self.mdl_name = self.model.id  # name may change because of aliasing
         else:
             # set default model
-            if not self.model_name:
+            if not self.mdl_name:
                 valid_models = [
                     model
                     for model in self.available_models
@@ -214,9 +213,9 @@ def __init__(self, **kwargs: Any):
                 ]
                 self.model = next(iter(valid_models), None)
                 if self.model:
-                    self.model_name = self.model.id
+                    self.mdl_name = self.model.id
                     warnings.warn(
-                        f"Default model is set as: {self.model_name}. \n"
+                        f"Default model is set as: {self.mdl_name}. \n"
                         "Set model using model parameter. \n"
                         "To get available models use available_models property.",
                         UserWarning,
@@ -233,15 +232,15 @@ def is_lc_serializable(cls) -> bool:
 
     @property
     def lc_secrets(self) -> Dict[str, str]:
-        return {"api_key": self._api_key_var}
+        return {"api_key": _API_KEY_VAR}
 
     @property
     def lc_attributes(self) -> Dict[str, Any]:
         attributes: Dict[str, Any] = {}
         attributes["base_url"] = self.base_url
 
-        if self.model_name:
-            attributes["model"] = self.model_name
+        if self.mdl_name:
+            attributes["model"] = self.mdl_name
 
         return attributes
 
@@ -332,11 +331,15 @@ def _post(
         self,
         invoke_url: str,
         payload: Optional[dict] = {},
+        extra_headers: dict = {},
     ) -> Tuple[Response, requests.Session]:
         """Method for posting to the AI Foundation Model Function API."""
         self.last_inputs = {
             "url": invoke_url,
-            "headers": self.headers_tmpl["call"],
+            "headers": {
+                **self.headers_tmpl["call"],
+                **extra_headers,
+            },
             "json": payload,
         }
         session = self.get_session_fn()
@@ -372,9 +375,7 @@ def _wait(self, response: Response, session: requests.Session) -> Response:
         start_time = time.time()
         # note: the local NIM does not return a 202 status code
         #       (per RL 22may2024 circa 24.05)
-        while (
-            response.status_code == 202
-        ):  # todo: there are no tests that reach this point
+        while response.status_code == 202:
             time.sleep(self.interval)
             if (time.time() - start_time) > self.timeout:
                 raise TimeoutError(
@@ -385,10 +386,12 @@ def _wait(self, response: Response, session: requests.Session) -> Response:
                 "NVCF-REQID" in response.headers
             ), "Received 202 response with no request id to follow"
             request_id = response.headers.get("NVCF-REQID")
-            # todo: this needs testing, missing auth header update
+            payload = {
+                "url": self.polling_url_tmpl.format(request_id=request_id),
+                "headers": self.headers_tmpl["call"],
+            }
             self.last_response = response = session.get(
-                self.polling_url_tmpl.format(request_id=request_id),
-                headers=self.headers_tmpl["call"],
+                **self.__add_authorization(payload)
             )
         self._try_raise(response)
         return response
@@ -444,9 +447,12 @@ def _try_raise(self, response: Response) -> None:
     def get_req(
         self,
         payload: dict = {},
+        extra_headers: dict = {},
     ) -> Response:
         """Post to the API."""
-        response, session = self._post(self.infer_url, payload)
+        response, session = self._post(
+            self.infer_url, payload, extra_headers=extra_headers
+        )
         return self._wait(response, session)
 
     def postprocess(
@@ -485,7 +491,10 @@ def _aggregate_msgs(self, msg_list: Sequence[dict]) -> Tuple[dict, bool]:
             usage_holder = msg.get("usage", {})  ####
             if "choices" in msg:
                 ## Tease out ['choices'][0]...['delta'/'message']
-                msg = msg.get("choices", [{}])[0]
+                # when streaming w/ usage info, we may get a response
+                #  w/ choices: [] that includes final usage info
+                choices = msg.get("choices", [{}])
+                msg = choices[0] if choices else {}
                 # todo: this meeds to be fixed, the fact we only
                 #       use the first choice breaks the interface
                 finish_reason_holder = msg.get("finish_reason", None)
@@ -517,10 +526,14 @@ def _aggregate_msgs(self, msg_list: Sequence[dict]) -> Tuple[dict, bool]:
     def get_req_stream(
         self,
         payload: dict,
+        extra_headers: dict = {},
     ) -> Iterator[Dict]:
         self.last_inputs = {
             "url": self.infer_url,
-            "headers": self.headers_tmpl["stream"],
+            "headers": {
+                **self.headers_tmpl["stream"],
+                **extra_headers,
+            },
             "json": payload,
         }
 
@@ -528,7 +541,7 @@ def get_req_stream(
             stream=True, **self.__add_authorization(self.last_inputs)
         )
         self._try_raise(response)
-        call = self.copy()
+        call: _NVIDIAClient = self.model_copy()
 
         def out_gen() -> Generator[dict, Any, Any]:
             ## Good for client, since it allows self.last_inputs
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
index 2f00b4ce..edd2014a 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -2,7 +2,7 @@
 import warnings
 from typing import Literal, Optional
 
-from langchain_core.pydantic_v1 import BaseModel, validator
+from pydantic import BaseModel, model_validator
 
 
 class Model(BaseModel):
@@ -23,7 +23,7 @@ class Model(BaseModel):
     id: str
     # why do we have a model_type? because ChatNVIDIA can speak both chat and vlm.
     model_type: Optional[
-        Literal["chat", "vlm", "embedding", "ranking", "completions", "qa"]
+        Literal["chat", "vlm", "nv-vlm", "embedding", "ranking", "completions", "qa"]
     ] = None
     client: Optional[
         Literal["ChatNVIDIA", "NVIDIAEmbeddings", "NVIDIARerank", "NVIDIA"]
@@ -37,21 +37,21 @@ class Model(BaseModel):
     def __hash__(self) -> int:
         return hash(self.id)
 
-    @validator("client", always=True)
-    def validate_client(cls, client: str, values: dict) -> str:
-        if client:
+    @model_validator(mode="after")
+    def validate_client(self) -> "Model":
+        if self.client:
             supported = {
-                "ChatNVIDIA": ("chat", "vlm", "qa"),
+                "ChatNVIDIA": ("chat", "vlm", "nv-vlm", "qa"),
                 "NVIDIAEmbeddings": ("embedding",),
                 "NVIDIARerank": ("ranking",),
                 "NVIDIA": ("completions",),
             }
-            model_type = values.get("model_type")
-            if model_type not in supported[client]:
+            if self.model_type not in supported.get(self.client, ()):
                 raise ValueError(
-                    f"Model type '{model_type}' not supported by client '{client}'"
+                    f"Model type '{self.model_type}' not supported "
+                    f"by client '{self.client}'"
                 )
-        return client
+        return self
 
 
 CHAT_MODEL_TABLE = {
@@ -427,63 +427,56 @@ def validate_client(cls, client: str, values: dict) -> str:
 VLM_MODEL_TABLE = {
     "adept/fuyu-8b": Model(
         id="adept/fuyu-8b",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/adept/fuyu-8b",
         aliases=["ai-fuyu-8b", "playground_fuyu_8b", "fuyu_8b"],
     ),
     "google/deplot": Model(
         id="google/deplot",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/google/deplot",
         aliases=["ai-google-deplot", "playground_deplot", "deplot"],
     ),
     "microsoft/kosmos-2": Model(
         id="microsoft/kosmos-2",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/microsoft/kosmos-2",
         aliases=["ai-microsoft-kosmos-2", "playground_kosmos_2", "kosmos_2"],
     ),
     "nvidia/neva-22b": Model(
         id="nvidia/neva-22b",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/nvidia/neva-22b",
         aliases=["ai-neva-22b", "playground_neva_22b", "neva_22b"],
     ),
     "google/paligemma": Model(
         id="google/paligemma",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/google/paligemma",
         aliases=["ai-google-paligemma"],
     ),
     "microsoft/phi-3-vision-128k-instruct": Model(
         id="microsoft/phi-3-vision-128k-instruct",
-        model_type="vlm",
+        model_type="nv-vlm",
         client="ChatNVIDIA",
         endpoint="https://ai.api.nvidia.com/v1/vlm/microsoft/phi-3-vision-128k-instruct",
         aliases=["ai-phi-3-vision-128k-instruct"],
     ),
-    "liuhaotian/llava-v1.6-mistral-7b": Model(
-        id="liuhaotian/llava-v1.6-mistral-7b",
+    "microsoft/phi-3.5-vision-instruct": Model(
+        id="microsoft/phi-3.5-vision-instruct",
         model_type="vlm",
         client="ChatNVIDIA",
-        endpoint="https://ai.api.nvidia.com/v1/stg/vlm/community/llava16-mistral-7b",
-        aliases=[
-            "ai-llava16-mistral-7b",
-            "community/llava16-mistral-7b",
-            "liuhaotian/llava16-mistral-7b",
-        ],
     ),
-    "liuhaotian/llava-v1.6-34b": Model(
-        id="liuhaotian/llava-v1.6-34b",
+    "nvidia/vila": Model(
+        id="nvidia/vila",
         model_type="vlm",
         client="ChatNVIDIA",
-        endpoint="https://ai.api.nvidia.com/v1/stg/vlm/community/llava16-34b",
-        aliases=["ai-llava16-34b", "community/llava16-34b", "liuhaotian/llava16-34b"],
+        endpoint="https://ai.api.nvidia.com/v1/vlm/nvidia/vila",
     ),
 }
 
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
index 42d825eb..28018dcc 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -4,10 +4,9 @@
 
 import base64
 import enum
-import io
 import logging
 import os
-import sys
+import re
 import urllib.parse
 import warnings
 from typing import (
@@ -19,17 +18,18 @@
     Literal,
     Optional,
     Sequence,
+    Tuple,
     Type,
     Union,
 )
 
-import requests
 from langchain_core.callbacks.manager import (
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
 )
 from langchain_core.exceptions import OutputParserException
 from langchain_core.language_models import BaseChatModel, LanguageModelInput
+from langchain_core.language_models.chat_models import LangSmithParams
 from langchain_core.messages import (
     AIMessage,
     AIMessageChunk,
@@ -46,26 +46,17 @@
     ChatResult,
     Generation,
 )
-from langchain_core.pydantic_v1 import BaseModel, Field, PrivateAttr, root_validator
 from langchain_core.runnables import Runnable
 from langchain_core.tools import BaseTool
 from langchain_core.utils.function_calling import convert_to_openai_tool
 from langchain_core.utils.pydantic import is_basemodel_subclass
+from pydantic import BaseModel, Field, PrivateAttr
 
 from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 from langchain_nvidia_ai_endpoints._statics import Model
 from langchain_nvidia_ai_endpoints._utils import convert_message_to_dict
 
 _CallbackManager = Union[AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun]
-_DictOrPydanticOrEnumClass = Union[Dict[str, Any], Type[BaseModel], Type[enum.Enum]]
-_DictOrPydanticOrEnum = Union[Dict, BaseModel, enum.Enum]
-
-try:
-    import PIL.Image
-
-    has_pillow = True
-except ImportError:
-    has_pillow = False
 
 logger = logging.getLogger(__name__)
 
@@ -79,46 +70,56 @@ def _is_url(s: str) -> bool:
         return False
 
 
-def _resize_image(img_data: bytes, max_dim: int = 1024) -> str:
-    if not has_pillow:
-        print(  # noqa: T201
-            "Pillow is required to resize images down to reasonable scale."
-            " Please install it using `pip install pillow`."
-            " For now, not resizing; may cause NVIDIA API to fail."
-        )
-        return base64.b64encode(img_data).decode("utf-8")
-    image = PIL.Image.open(io.BytesIO(img_data))
-    max_dim_size = max(image.size)
-    aspect_ratio = max_dim / max_dim_size
-    new_h = int(image.size[1] * aspect_ratio)
-    new_w = int(image.size[0] * aspect_ratio)
-    resized_image = image.resize((new_w, new_h), PIL.Image.Resampling.LANCZOS)
-    output_buffer = io.BytesIO()
-    resized_image.save(output_buffer, format="JPEG")
-    output_buffer.seek(0)
-    resized_b64_string = base64.b64encode(output_buffer.read()).decode("utf-8")
-    return resized_b64_string
-
-
 def _url_to_b64_string(image_source: str) -> str:
-    b64_template = "data:image/png;base64,{b64_string}"
     try:
         if _is_url(image_source):
-            response = requests.get(
-                image_source, headers={"User-Agent": "langchain-nvidia-ai-endpoints"}
-            )
-            response.raise_for_status()
-            encoded = base64.b64encode(response.content).decode("utf-8")
-            if sys.getsizeof(encoded) > 200000:
-                ## (VK) Temporary fix. NVIDIA API has a limit of 250KB for the input.
-                encoded = _resize_image(response.content)
-            return b64_template.format(b64_string=encoded)
+            return image_source
+            # import sys
+            # import io
+            # try:
+            #     import PIL.Image
+            #     has_pillow = True
+            # except ImportError:
+            #     has_pillow = False
+            # def _resize_image(img_data: bytes, max_dim: int = 1024) -> str:
+            #     if not has_pillow:
+            #         print(  # noqa: T201
+            #             "Pillow is required to resize images down to reasonable scale."  # noqa: E501
+            #             " Please install it using `pip install pillow`."
+            #             " For now, not resizing; may cause NVIDIA API to fail."
+            #         )
+            #         return base64.b64encode(img_data).decode("utf-8")
+            #     image = PIL.Image.open(io.BytesIO(img_data))
+            #     max_dim_size = max(image.size)
+            #     aspect_ratio = max_dim / max_dim_size
+            #     new_h = int(image.size[1] * aspect_ratio)
+            #     new_w = int(image.size[0] * aspect_ratio)
+            #     resized_image = image.resize((new_w, new_h), PIL.Image.Resampling.LANCZOS)  # noqa: E501
+            #     output_buffer = io.BytesIO()
+            #     resized_image.save(output_buffer, format="JPEG")
+            #     output_buffer.seek(0)
+            #     resized_b64_string = base64.b64encode(output_buffer.read()).decode("utf-8")  # noqa: E501
+            #     return resized_b64_string
+            # b64_template = "data:image/png;base64,{b64_string}"
+            # response = requests.get(
+            #     image_source, headers={"User-Agent": "langchain-nvidia-ai-endpoints"}
+            # )
+            # response.raise_for_status()
+            # encoded = base64.b64encode(response.content).decode("utf-8")
+            # if sys.getsizeof(encoded) > 200000:
+            #     ## (VK) Temporary fix. NVIDIA API has a limit of 250KB for the input.
+            #     encoded = _resize_image(response.content)
+            # return b64_template.format(b64_string=encoded)
         elif image_source.startswith("data:image"):
             return image_source
         elif os.path.exists(image_source):
             with open(image_source, "rb") as f:
-                encoded = base64.b64encode(f.read()).decode("utf-8")
-                return b64_template.format(b64_string=encoded)
+                image_data = f.read()
+                import imghdr
+
+                image_type = imghdr.what(None, image_data)
+                encoded = base64.b64encode(image_data).decode("utf-8")
+                return f"data:image/{image_type};base64,{encoded}"
         else:
             raise ValueError(
                 "The provided string is not a valid URL, base64, or file path."
@@ -127,7 +128,9 @@ def _url_to_b64_string(image_source: str) -> str:
         raise ValueError(f"Unable to process the provided image source: {e}")
 
 
-def _nv_vlm_adjust_input(message_dict: Dict[str, Any]) -> Dict[str, Any]:
+def _nv_vlm_adjust_input(
+    message_dict: Dict[str, Any], model_type: str
+) -> Dict[str, Any]:
     """
     The NVIDIA VLM API input message.content:
         {
@@ -170,10 +173,73 @@ def _nv_vlm_adjust_input(message_dict: Dict[str, Any]) -> Dict[str, Any]:
                         isinstance(part["image_url"], dict)
                         and "url" in part["image_url"]
                     ):
-                        part["image_url"] = _url_to_b64_string(part["image_url"]["url"])
+                        url = _url_to_b64_string(part["image_url"]["url"])
+                        if model_type == "nv-vlm":
+                            part["image_url"] = url
+                        else:
+                            part["image_url"]["url"] = url
     return message_dict
 
 
+def _nv_vlm_get_asset_ids(
+    content: Union[str, List[Union[str, Dict[str, Any]]]],
+) -> List[str]:
+    """
+    VLM APIs accept asset IDs as input in two forms:
+     - content = [{"image_url": {"url": "data:image/{type};asset_id,{asset_id}"}}*]
+     - content = .*<img src="data:image/{type};asset_id,{asset_id}"/>.*
+
+    This function extracts asset IDs from the message content.
+    """
+
+    def extract_asset_id(data: str) -> List[str]:
+        pattern = re.compile(r'data:image/[^;]+;asset_id,([^"\'\s]+)')
+        return pattern.findall(data)
+
+    asset_ids = []
+    if isinstance(content, str):
+        asset_ids.extend(extract_asset_id(content))
+    elif isinstance(content, list):
+        for part in content:
+            if isinstance(part, str):
+                asset_ids.extend(extract_asset_id(part))
+            elif isinstance(part, dict) and "image_url" in part:
+                image_url = part["image_url"]
+                if isinstance(image_url, dict) and "url" in image_url:
+                    asset_ids.extend(extract_asset_id(image_url["url"]))
+
+    return asset_ids
+
+
+def _process_for_vlm(
+    inputs: List[Dict[str, Any]],
+    model: Optional[Model],  # not optional, Optional for type alignment
+) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
+    """
+    Process inputs for NVIDIA VLM models.
+
+    This function processes the input messages for NVIDIA VLM models.
+    It extracts asset IDs from the input messages and adds them to the
+    headers for the NVIDIA VLM API.
+    """
+    if not model or not model.model_type:
+        return inputs, {}
+
+    extra_headers = {}
+    if "vlm" in model.model_type:
+        asset_ids = []
+        for input in inputs:
+            if "content" in input:
+                asset_ids.extend(_nv_vlm_get_asset_ids(input["content"]))
+        if asset_ids:
+            extra_headers["NVCF-INPUT-ASSET-REFERENCES"] = ",".join(asset_ids)
+        inputs = [_nv_vlm_adjust_input(message, model.model_type) for message in inputs]
+    return inputs, extra_headers
+
+
+_DEFAULT_MODEL_NAME: str = "meta/llama3-8b-instruct"
+
+
 class ChatNVIDIA(BaseChatModel):
     """NVIDIA chat model.
 
@@ -188,31 +254,20 @@ class ChatNVIDIA(BaseChatModel):
     """
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
-    _default_model_name: str = "meta/llama3-8b-instruct"
-    _default_base_url: str = "https://integrate.api.nvidia.com/v1"
-    base_url: str = Field(
+    base_url: Optional[str] = Field(
+        default=None,
         description="Base url for model listing an invocation",
     )
-    model: Optional[str] = Field(description="Name of the model to invoke")
-    temperature: Optional[float] = Field(description="Sampling temperature in [0, 1]")
+    model: Optional[str] = Field(None, description="Name of the model to invoke")
+    temperature: Optional[float] = Field(
+        None, description="Sampling temperature in [0, 1]"
+    )
     max_tokens: Optional[int] = Field(
         1024, description="Maximum # of tokens to generate"
     )
-    top_p: Optional[float] = Field(description="Top-p for distribution sampling")
-    seed: Optional[int] = Field(description="The seed for deterministic results")
-    stop: Optional[Sequence[str]] = Field(description="Stop words (cased)")
-
-    _base_url_var = "NVIDIA_BASE_URL"
-
-    @root_validator(pre=True)
-    def _validate_base_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        values["base_url"] = (
-            values.get(cls._base_url_var.lower())
-            or values.get("base_url")
-            or os.getenv(cls._base_url_var)
-            or cls._default_base_url
-        )
-        return values
+    top_p: Optional[float] = Field(None, description="Top-p for distribution sampling")
+    seed: Optional[int] = Field(None, description="The seed for deterministic results")
+    stop: Optional[Sequence[str]] = Field(None, description="Stop words (cased)")
 
     def __init__(self, **kwargs: Any):
         """
@@ -248,17 +303,23 @@ def __init__(self, **kwargs: Any):
             )
         """
         super().__init__(**kwargs)
+        # allow nvidia_base_url as an alternative for base_url
+        base_url = kwargs.pop("nvidia_base_url", self.base_url)
+        # allow nvidia_api_key as an alternative for api_key
+        api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None))
         self._client = _NVIDIAClient(
-            base_url=self.base_url,
-            model_name=self.model,
-            default_hosted_model_name=self._default_model_name,
-            api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
+            **({"base_url": base_url} if base_url else {}),  # only pass if set
+            mdl_name=self.model,
+            default_hosted_model_name=_DEFAULT_MODEL_NAME,
+            **({"api_key": api_key} if api_key else {}),  # only pass if set
             infer_path="{base_url}/chat/completions",
             cls=self.__class__.__name__,
         )
         # todo: only store the model in one place
         # the model may be updated to a newer name during initialization
-        self.model = self._client.model_name
+        self.model = self._client.mdl_name
+        # same for base_url
+        self.base_url = self._client.base_url
 
     @property
     def available_models(self) -> List[Model]:
@@ -282,6 +343,28 @@ def _llm_type(self) -> str:
         """Return type of NVIDIA AI Foundation Model Interface."""
         return "chat-nvidia-ai-playground"
 
+    def _get_ls_params(
+        self,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> LangSmithParams:
+        """Get standard LangSmith parameters for tracing."""
+        params = self._get_invocation_params(stop=stop, **kwargs)
+        return LangSmithParams(
+            ls_provider="NVIDIA",
+            # error: Incompatible types (expression has type "Optional[str]",
+            #  TypedDict item "ls_model_name" has type "str")  [typeddict-item]
+            ls_model_name=self.model or "UNKNOWN",
+            ls_model_type="chat",
+            ls_temperature=params.get("temperature", self.temperature),
+            ls_max_tokens=params.get("max_tokens", self.max_tokens),
+            # mypy error: Extra keys ("ls_top_p", "ls_seed")
+            #  for TypedDict "LangSmithParams"  [typeddict-item]
+            # ls_top_p=params.get("top_p", self.top_p),
+            # ls_seed=params.get("seed", self.seed),
+            ls_stop=params.get("stop", self.stop),
+        )
+
     def _generate(
         self,
         messages: List[BaseMessage],
@@ -290,11 +373,12 @@ def _generate(
         **kwargs: Any,
     ) -> ChatResult:
         inputs = [
-            _nv_vlm_adjust_input(message)
+            message
             for message in [convert_message_to_dict(message) for message in messages]
         ]
+        inputs, extra_headers = _process_for_vlm(inputs, self._client.model)
         payload = self._get_payload(inputs=inputs, stop=stop, stream=False, **kwargs)
-        response = self._client.get_req(payload=payload)
+        response = self._client.get_req(payload=payload, extra_headers=extra_headers)
         responses, _ = self._client.postprocess(response)
         self._set_callback_out(responses, run_manager)
         parsed_response = self._custom_postprocess(responses, streaming=False)
@@ -313,11 +397,28 @@ def _stream(
     ) -> Iterator[ChatGenerationChunk]:
         """Allows streaming to model!"""
         inputs = [
-            _nv_vlm_adjust_input(message)
+            message
             for message in [convert_message_to_dict(message) for message in messages]
         ]
-        payload = self._get_payload(inputs=inputs, stop=stop, stream=True, **kwargs)
-        for response in self._client.get_req_stream(payload=payload):
+        inputs, extra_headers = _process_for_vlm(inputs, self._client.model)
+        payload = self._get_payload(
+            inputs=inputs,
+            stop=stop,
+            stream=True,
+            stream_options={"include_usage": True},
+            **kwargs,
+        )
+        # todo: get vlm endpoints fixed and remove this
+        #       vlm endpoints do not accept standard stream_options parameter
+        if (
+            self._client.model
+            and self._client.model.model_type
+            and self._client.model.model_type == "nv-vlm"
+        ):
+            payload.pop("stream_options")
+        for response in self._client.get_req_stream(
+            payload=payload, extra_headers=extra_headers
+        ):
             self._set_callback_out(response, run_manager)
             parsed_response = self._custom_postprocess(response, streaming=True)
             # for pre 0.2 compatibility w/ ChatMessageChunk
@@ -354,6 +455,12 @@ def _custom_postprocess(
             "additional_kwargs": {},
             "response_metadata": {},
         }
+        if token_usage := kw_left.pop("token_usage", None):
+            out_dict["usage_metadata"] = {
+                "input_tokens": token_usage.get("prompt_tokens", 0),
+                "output_tokens": token_usage.get("completion_tokens", 0),
+                "total_tokens": token_usage.get("total_tokens", 0),
+            }
         # "tool_calls" is set for invoke and stream responses
         if tool_calls := kw_left.pop("tool_calls", None):
             assert isinstance(
@@ -447,7 +554,7 @@ def _get_payload(
 
     def bind_tools(
         self,
-        tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
+        tools: Sequence[Union[Dict[str, Any], Type, Callable, BaseTool]],
         *,
         tool_choice: Optional[
             Union[dict, str, Literal["auto", "none", "any", "required"], bool]
@@ -533,11 +640,11 @@ def bind_functions(
     # as a result need to type ignore for the schema parameter and return type.
     def with_structured_output(  # type: ignore
         self,
-        schema: _DictOrPydanticOrEnumClass,
+        schema: Union[Dict, Type],
         *,
         include_raw: bool = False,
         **kwargs: Any,
-    ) -> Runnable[LanguageModelInput, _DictOrPydanticOrEnum]:
+    ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
         """
         Bind a structured output schema to the model.
 
@@ -574,7 +681,7 @@ def with_structured_output(  # type: ignore
         1. If a Pydantic schema is provided, the model will return a Pydantic object.
            Example:
         ```
-        from langchain_core.pydantic_v1 import BaseModel, Field
+        from pydantic import BaseModel, Field
         class Joke(BaseModel):
             setup: str = Field(description="The setup of the joke")
             punchline: str = Field(description="The punchline to the joke")
@@ -732,7 +839,11 @@ def parse_result(
                     return None
 
             output_parser = ForgivingPydanticOutputParser(pydantic_object=schema)
-            nvext_param = {"guided_json": schema.schema()}
+            if hasattr(schema, "model_json_schema"):
+                json_schema = schema.model_json_schema()
+            else:
+                json_schema = schema.schema()
+            nvext_param = {"guided_json": json_schema}
 
         else:
             raise ValueError(
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
index 02f3715a..dc29d39c 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -1,22 +1,24 @@
 """Embeddings Components Derived from NVEModel/Embeddings"""
 
-import os
 import warnings
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, List, Literal, Optional
 
 from langchain_core.embeddings import Embeddings
 from langchain_core.outputs.llm_result import LLMResult
-from langchain_core.pydantic_v1 import (
+from pydantic import (
     BaseModel,
+    ConfigDict,
     Field,
     PrivateAttr,
-    root_validator,
 )
 
 from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 from langchain_nvidia_ai_endpoints._statics import Model
 from langchain_nvidia_ai_endpoints.callbacks import usage_callback_var
 
+_DEFAULT_MODEL_NAME: str = "nvidia/nv-embedqa-e5-v5"
+_DEFAULT_BATCH_SIZE: int = 50
+
 
 class NVIDIAEmbeddings(BaseModel, Embeddings):
     """
@@ -29,17 +31,16 @@ class NVIDIAEmbeddings(BaseModel, Embeddings):
         too long.
     """
 
-    class Config:
-        validate_assignment = True
+    model_config = ConfigDict(
+        validate_assignment=True,
+    )
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
-    _default_model_name: str = "nvidia/nv-embedqa-e5-v5"
-    _default_max_batch_size: int = 50
-    _default_base_url: str = "https://integrate.api.nvidia.com/v1"
-    base_url: str = Field(
+    base_url: Optional[str] = Field(
+        default=None,
         description="Base url for model listing an invocation",
     )
-    model: Optional[str] = Field(description="Name of the model to invoke")
+    model: Optional[str] = Field(None, description="Name of the model to invoke")
     truncate: Literal["NONE", "START", "END"] = Field(
         default="NONE",
         description=(
@@ -47,19 +48,7 @@ class Config:
             "Default is 'NONE', which raises an error if an input is too long."
         ),
     )
-    max_batch_size: int = Field(default=_default_max_batch_size)
-
-    _base_url_var = "NVIDIA_BASE_URL"
-
-    @root_validator(pre=True)
-    def _validate_base_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        values["base_url"] = (
-            values.get(cls._base_url_var.lower())
-            or values.get("base_url")
-            or os.getenv(cls._base_url_var)
-            or cls._default_base_url
-        )
-        return values
+    max_batch_size: int = Field(default=_DEFAULT_BATCH_SIZE)
 
     def __init__(self, **kwargs: Any):
         """
@@ -90,17 +79,23 @@ def __init__(self, **kwargs: Any):
             embedder = NVIDIAEmbeddings(base_url="http://localhost:8080/v1")
         """
         super().__init__(**kwargs)
+        # allow nvidia_base_url as an alternative for base_url
+        base_url = kwargs.pop("nvidia_base_url", self.base_url)
+        # allow nvidia_api_key as an alternative for api_key
+        api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None))
         self._client = _NVIDIAClient(
-            base_url=self.base_url,
-            model_name=self.model,
-            default_hosted_model_name=self._default_model_name,
-            api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
+            **({"base_url": base_url} if base_url else {}),  # only pass if set
+            mdl_name=self.model,
+            default_hosted_model_name=_DEFAULT_MODEL_NAME,
+            **({"api_key": api_key} if api_key else {}),  # only pass if set
             infer_path="{base_url}/embeddings",
             cls=self.__class__.__name__,
         )
         # todo: only store the model in one place
         # the model may be updated to a newer name during initialization
-        self.model = self._client.model_name
+        self.model = self._client.mdl_name
+        # same for base_url
+        self.base_url = self._client.base_url
 
         # todo: remove when nvolveqa_40k is removed from MODEL_TABLE
         if "model" in kwargs and kwargs["model"] in [
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
index 12f364a5..942e610d 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -1,50 +1,40 @@
 from __future__ import annotations
 
-import os
 import warnings
 from typing import Any, Dict, Iterator, List, Optional
 
 from langchain_core.callbacks.manager import CallbackManagerForLLMRun
 from langchain_core.language_models.llms import LLM
 from langchain_core.outputs import GenerationChunk
-from langchain_core.pydantic_v1 import Field, PrivateAttr, root_validator
+from pydantic import ConfigDict, Field, PrivateAttr
 
 from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 from langchain_nvidia_ai_endpoints._statics import Model
 
+_DEFAULT_MODEL_NAME: str = "nvidia/mistral-nemo-minitron-8b-base"
+
 
 class NVIDIA(LLM):
     """
     LangChain LLM that uses the Completions API with NVIDIA NIMs.
     """
 
-    class Config:
-        validate_assignment = True
+    model_config = ConfigDict(
+        validate_assignment=True,
+    )
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
     _default_model_name: str = "nvidia/mistral-nemo-minitron-8b-base"
-    _default_base_url: str = "https://integrate.api.nvidia.com/v1"
-    base_url: str = Field(
+    base_url: Optional[str] = Field(
+        default=None,
         description="Base url for model listing and invocation",
     )
-    model: Optional[str] = Field(description="The model to use for completions.")
-
-    _base_url_var = "NVIDIA_BASE_URL"
+    model: Optional[str] = Field(None, description="The model to use for completions.")
 
     _init_args: Dict[str, Any] = PrivateAttr()
     """Stashed arguments given to the constructor that can be passed to
     the Completions API endpoint."""
 
-    @root_validator(pre=True)
-    def _validate_base_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        values["base_url"] = (
-            values.get(cls._base_url_var.lower())
-            or values.get("base_url")
-            or os.getenv(cls._base_url_var)
-            or cls._default_base_url
-        )
-        return values
-
     def __check_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
         """
         Check kwargs, warn for unknown keys, and return a copy recognized keys.
@@ -109,17 +99,23 @@ def __init__(self, **kwargs: Any):
         e.g. `NVIDIA().invoke("prompt", max_tokens=512)`.
         """
         super().__init__(**kwargs)
+        # allow nvidia_base_url as an alternative for base_url
+        base_url = kwargs.pop("nvidia_base_url", self.base_url)
+        # allow nvidia_api_key as an alternative for api_key
+        api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None))
         self._client = _NVIDIAClient(
-            base_url=self.base_url,
-            model_name=self.model,
-            default_hosted_model_name=self._default_model_name,
-            api_key=kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None)),
+            **({"base_url": base_url} if base_url else {}),  # only pass if set
+            mdl_name=self.model,
+            default_hosted_model_name=_DEFAULT_MODEL_NAME,
+            **({"api_key": api_key} if api_key else {}),  # only pass if set
             infer_path="{base_url}/completions",
             cls=self.__class__.__name__,
         )
         # todo: only store the model in one place
         # the model may be updated to a newer name during initialization
-        self.model = self._client.model_name
+        self.model = self._client.mdl_name
+        # same for base_url
+        self.base_url = self._client.base_url
 
         # stash all additional args that can be passed to the Completions API,
         # but first make sure we pull out any args that are processed elsewhere.
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
index d1caa69f..d64f8d84 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -1,12 +1,16 @@
 from __future__ import annotations
 
-import os
-from typing import Any, Dict, Generator, List, Literal, Optional, Sequence
+from typing import Any, Generator, List, Literal, Optional, Sequence
 
 from langchain_core.callbacks.manager import Callbacks
 from langchain_core.documents import Document
 from langchain_core.documents.compressor import BaseDocumentCompressor
-from langchain_core.pydantic_v1 import BaseModel, Field, PrivateAttr, root_validator
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    PrivateAttr,
+)
 
 from langchain_nvidia_ai_endpoints._common import _NVIDIAClient
 from langchain_nvidia_ai_endpoints._statics import Model
@@ -17,25 +21,29 @@ class Ranking(BaseModel):
     logit: float
 
 
+_DEFAULT_MODEL_NAME: str = "nvidia/nv-rerankqa-mistral-4b-v3"
+_DEFAULT_BATCH_SIZE: int = 32
+
+
 class NVIDIARerank(BaseDocumentCompressor):
     """
     LangChain Document Compressor that uses the NVIDIA NeMo Retriever Reranking API.
     """
 
-    class Config:
-        validate_assignment = True
+    model_config = ConfigDict(
+        validate_assignment=True,
+    )
 
     _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
 
-    _default_batch_size: int = 32
-    _default_model_name: str = "nvidia/nv-rerankqa-mistral-4b-v3"
-    _default_base_url: str = "https://integrate.api.nvidia.com/v1"
-    base_url: str = Field(
+    base_url: Optional[str] = Field(
+        default=None,
         description="Base url for model listing an invocation",
     )
     top_n: int = Field(5, ge=0, description="The number of documents to return.")
-    model: Optional[str] = Field(description="The model to use for reranking.")
+    model: Optional[str] = Field(None, description="The model to use for reranking.")
     truncate: Optional[Literal["NONE", "END"]] = Field(
+        default=None,
         description=(
             "Truncate input text if it exceeds the model's maximum token length. "
             "Default is model dependent and is likely to raise error if an "
@@ -43,21 +51,9 @@ class Config:
         ),
     )
     max_batch_size: int = Field(
-        _default_batch_size, ge=1, description="The maximum batch size."
+        _DEFAULT_BATCH_SIZE, ge=1, description="The maximum batch size."
     )
 
-    _base_url_var = "NVIDIA_BASE_URL"
-
-    @root_validator(pre=True)
-    def _validate_base_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
-        values["base_url"] = (
-            values.get(cls._base_url_var.lower())
-            or values.get("base_url")
-            or os.getenv(cls._base_url_var)
-            or cls._default_base_url
-        )
-        return values
-
     def __init__(self, **kwargs: Any):
         """
         Create a new NVIDIARerank document compressor.
@@ -134,17 +130,23 @@ def __init__(self, **kwargs: Any):
         """
 
         super().__init__(**kwargs)
+        # allow nvidia_base_url as an alternative for base_url
+        base_url = kwargs.pop("nvidia_base_url", self.base_url)
+        # allow nvidia_api_key as an alternative for api_key
+        api_key = kwargs.pop("nvidia_api_key", kwargs.pop("api_key", None))
         self._client = _NVIDIAClient(
-            base_url=self.base_url,
-            model_name=self.model,
-            default_hosted_model_name=self._default_model_name,
-            api_key=kwargs.get("nvidia_api_key", kwargs.get("api_key", None)),
+            **({"base_url": base_url} if base_url else {}),  # only pass if set
+            mdl_name=self.model,
+            default_hosted_model_name=_DEFAULT_MODEL_NAME,
+            **({"api_key": api_key} if api_key else {}),  # only pass if set
             infer_path="{base_url}/ranking",
             cls=self.__class__.__name__,
         )
         # todo: only store the model in one place
         # the model may be updated to a newer name during initialization
-        self.model = self._client.model_name
+        self.model = self._client.mdl_name
+        # same for base_url
+        self.base_url = self._client.base_url
 
     @property
     def available_models(self) -> List[Model]:
diff --git a/libs/ai-endpoints/poetry.lock b/libs/ai-endpoints/poetry.lock
index 483fb260..a01a1932 100644
--- a/libs/ai-endpoints/poetry.lock
+++ b/libs/ai-endpoints/poetry.lock
@@ -148,18 +148,15 @@ files = [
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
-
 [[package]]
 name = "anyio"
-version = "4.4.0"
+version = "4.5.0"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
-    {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
+    {file = "anyio-4.5.0-py3-none-any.whl", hash = "sha256:fdeb095b7cc5a5563175eedd926ec4ae55413bb4be5770c424af0ba46ccb4a78"},
+    {file = "anyio-4.5.0.tar.gz", hash = "sha256:c5a275fe5ca0afd788001f58fca1e69e29ce706d746e317d660e21f70c530ef9"},
 ]
 
 [package.dependencies]
@@ -169,9 +166,9 @@ sniffio = ">=1.1"
 typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
 
 [package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
+doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"]
+trio = ["trio (>=0.26.1)"]
 
 [[package]]
 name = "async-timeout"
@@ -368,7 +365,6 @@ files = [
 
 [package.dependencies]
 python-dateutil = ">=2.4"
-typing-extensions = {version = ">=3.10.0.1", markers = "python_version <= \"3.8\""}
 
 [[package]]
 name = "freezegun"
@@ -579,19 +575,19 @@ files = [
 
 [[package]]
 name = "langchain-core"
-version = "0.2.40"
+version = "0.3.1"
 description = "Building applications with LLMs through composability"
 optional = false
-python-versions = ">=3.8.1,<4.0"
+python-versions = ">=3.9,<4.0"
 files = []
 develop = false
 
 [package.dependencies]
 jsonpatch = "^1.33"
-langsmith = "^0.1.112"
+langsmith = "^0.1.117"
 packaging = ">=23.2,<25"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
+    {version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
 ]
 PyYAML = ">=5.3"
@@ -601,19 +597,41 @@ typing-extensions = ">=4.7"
 [package.source]
 type = "git"
 url = "https://github.com/langchain-ai/langchain.git"
-reference = "langchain-core==0.2.40"
-resolved_reference = "0f2b32ffa96358192e011ee2f8db579a323ed0ce"
+reference = "HEAD"
+resolved_reference = "eef18dec442eabb2c2532bd67cc2efa12a43d406"
 subdirectory = "libs/core"
 
+[[package]]
+name = "langchain-standard-tests"
+version = "0.1.1"
+description = "Standard tests for LangChain implementations"
+optional = false
+python-versions = ">=3.9,<4.0"
+files = []
+develop = false
+
+[package.dependencies]
+httpx = "^0.27.0"
+langchain-core = "^0.3.0"
+pytest = ">=7,<9"
+syrupy = "^4"
+
+[package.source]
+type = "git"
+url = "https://github.com/langchain-ai/langchain.git"
+reference = "HEAD"
+resolved_reference = "eef18dec442eabb2c2532bd67cc2efa12a43d406"
+subdirectory = "libs/standard-tests"
+
 [[package]]
 name = "langsmith"
-version = "0.1.121"
+version = "0.1.123"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langsmith-0.1.121-py3-none-any.whl", hash = "sha256:fdb1ac8a671d3904201bfeea197d87bded46a10d08f1034af464211872e29893"},
-    {file = "langsmith-0.1.121.tar.gz", hash = "sha256:e9381b82a5bd484af9a51c3e96faea572746b8d617b070c1cda40cbbe48e33df"},
+    {file = "langsmith-0.1.123-py3-none-any.whl", hash = "sha256:ee30c96e69038af92487c6229870b9ccc1fba43eb1b84fb4132a013af7212c6e"},
+    {file = "langsmith-0.1.123.tar.gz", hash = "sha256:5d4ad7bb57351f0fc492debf2d7d0b96f2eed41b5545cd36f3043c5f4d42aa6b"},
 ]
 
 [package.dependencies]
@@ -981,18 +999,18 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pydantic"
-version = "2.9.1"
+version = "2.9.2"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.9.1-py3-none-any.whl", hash = "sha256:7aff4db5fdf3cf573d4b3c30926a510a10e19a0774d38fc4967f78beb6deb612"},
-    {file = "pydantic-2.9.1.tar.gz", hash = "sha256:1363c7d975c7036df0db2b4a61f2e062fbc0aa5ab5f2772e0ffc7191a4f4bce2"},
+    {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"},
+    {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"},
 ]
 
 [package.dependencies]
 annotated-types = ">=0.6.0"
-pydantic-core = "2.23.3"
+pydantic-core = "2.23.4"
 typing-extensions = [
     {version = ">=4.6.1", markers = "python_version < \"3.13\""},
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
@@ -1004,100 +1022,100 @@ timezone = ["tzdata"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.23.3"
+version = "2.23.4"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7f10a5d1b9281392f1bf507d16ac720e78285dfd635b05737c3911637601bae6"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c09a7885dd33ee8c65266e5aa7fb7e2f23d49d8043f089989726391dd7350c5"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6470b5a1ec4d1c2e9afe928c6cb37eb33381cab99292a708b8cb9aa89e62429b"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9172d2088e27d9a185ea0a6c8cebe227a9139fd90295221d7d495944d2367700"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86fc6c762ca7ac8fbbdff80d61b2c59fb6b7d144aa46e2d54d9e1b7b0e780e01"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0cb80fd5c2df4898693aa841425ea1727b1b6d2167448253077d2a49003e0ed"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03667cec5daf43ac4995cefa8aaf58f99de036204a37b889c24a80927b629cec"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:047531242f8e9c2db733599f1c612925de095e93c9cc0e599e96cf536aaf56ba"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5499798317fff7f25dbef9347f4451b91ac2a4330c6669821c8202fd354c7bee"},
-    {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bbb5e45eab7624440516ee3722a3044b83fff4c0372efe183fd6ba678ff681fe"},
-    {file = "pydantic_core-2.23.3-cp310-none-win32.whl", hash = "sha256:8b5b3ed73abb147704a6e9f556d8c5cb078f8c095be4588e669d315e0d11893b"},
-    {file = "pydantic_core-2.23.3-cp310-none-win_amd64.whl", hash = "sha256:2b603cde285322758a0279995b5796d64b63060bfbe214b50a3ca23b5cee3e83"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c889fd87e1f1bbeb877c2ee56b63bb297de4636661cc9bbfcf4b34e5e925bc27"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea85bda3189fb27503af4c45273735bcde3dd31c1ab17d11f37b04877859ef45"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7f7f72f721223f33d3dc98a791666ebc6a91fa023ce63733709f4894a7dc611"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b2b55b0448e9da68f56b696f313949cda1039e8ec7b5d294285335b53104b61"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c24574c7e92e2c56379706b9a3f07c1e0c7f2f87a41b6ee86653100c4ce343e5"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2b05e6ccbee333a8f4b8f4d7c244fdb7a979e90977ad9c51ea31261e2085ce0"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c409ce1c219c091e47cb03feb3c4ed8c2b8e004efc940da0166aaee8f9d6c8"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d965e8b325f443ed3196db890d85dfebbb09f7384486a77461347f4adb1fa7f8"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f56af3a420fb1ffaf43ece3ea09c2d27c444e7c40dcb7c6e7cf57aae764f2b48"},
-    {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b01a078dd4f9a52494370af21aa52964e0a96d4862ac64ff7cea06e0f12d2c5"},
-    {file = "pydantic_core-2.23.3-cp311-none-win32.whl", hash = "sha256:560e32f0df04ac69b3dd818f71339983f6d1f70eb99d4d1f8e9705fb6c34a5c1"},
-    {file = "pydantic_core-2.23.3-cp311-none-win_amd64.whl", hash = "sha256:c744fa100fdea0d000d8bcddee95213d2de2e95b9c12be083370b2072333a0fa"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e0ec50663feedf64d21bad0809f5857bac1ce91deded203efc4a84b31b2e4305"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db6e6afcb95edbe6b357786684b71008499836e91f2a4a1e55b840955b341dbb"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98ccd69edcf49f0875d86942f4418a4e83eb3047f20eb897bffa62a5d419c8fa"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a678c1ac5c5ec5685af0133262103defb427114e62eafeda12f1357a12140162"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01491d8b4d8db9f3391d93b0df60701e644ff0894352947f31fff3e52bd5c801"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fcf31facf2796a2d3b7fe338fe8640aa0166e4e55b4cb108dbfd1058049bf4cb"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7200fd561fb3be06827340da066df4311d0b6b8eb0c2116a110be5245dceb326"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc1636770a809dee2bd44dd74b89cc80eb41172bcad8af75dd0bc182c2666d4c"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:67a5def279309f2e23014b608c4150b0c2d323bd7bccd27ff07b001c12c2415c"},
-    {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:748bdf985014c6dd3e1e4cc3db90f1c3ecc7246ff5a3cd4ddab20c768b2f1dab"},
-    {file = "pydantic_core-2.23.3-cp312-none-win32.whl", hash = "sha256:255ec6dcb899c115f1e2a64bc9ebc24cc0e3ab097775755244f77360d1f3c06c"},
-    {file = "pydantic_core-2.23.3-cp312-none-win_amd64.whl", hash = "sha256:40b8441be16c1e940abebed83cd006ddb9e3737a279e339dbd6d31578b802f7b"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6daaf5b1ba1369a22c8b050b643250e3e5efc6a78366d323294aee54953a4d5f"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d015e63b985a78a3d4ccffd3bdf22b7c20b3bbd4b8227809b3e8e75bc37f9cb2"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3fc572d9b5b5cfe13f8e8a6e26271d5d13f80173724b738557a8c7f3a8a3791"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6bd91345b5163ee7448bee201ed7dd601ca24f43f439109b0212e296eb5b423"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc379c73fd66606628b866f661e8785088afe2adaba78e6bbe80796baf708a63"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbdce4b47592f9e296e19ac31667daed8753c8367ebb34b9a9bd89dacaa299c9"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3cf31edf405a161a0adad83246568647c54404739b614b1ff43dad2b02e6d5"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e22b477bf90db71c156f89a55bfe4d25177b81fce4aa09294d9e805eec13855"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0a0137ddf462575d9bce863c4c95bac3493ba8e22f8c28ca94634b4a1d3e2bb4"},
-    {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:203171e48946c3164fe7691fc349c79241ff8f28306abd4cad5f4f75ed80bc8d"},
-    {file = "pydantic_core-2.23.3-cp313-none-win32.whl", hash = "sha256:76bdab0de4acb3f119c2a4bff740e0c7dc2e6de7692774620f7452ce11ca76c8"},
-    {file = "pydantic_core-2.23.3-cp313-none-win_amd64.whl", hash = "sha256:37ba321ac2a46100c578a92e9a6aa33afe9ec99ffa084424291d84e456f490c1"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d063c6b9fed7d992bcbebfc9133f4c24b7a7f215d6b102f3e082b1117cddb72c"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6cb968da9a0746a0cf521b2b5ef25fc5a0bee9b9a1a8214e0a1cfaea5be7e8a4"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbefe079a520c5984e30e1f1f29325054b59534729c25b874a16a5048028d16"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbaaf2ef20d282659093913da9d402108203f7cb5955020bd8d1ae5a2325d1c4"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb539d7e5dc4aac345846f290cf504d2fd3c1be26ac4e8b5e4c2b688069ff4cf"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e6f33503c5495059148cc486867e1d24ca35df5fc064686e631e314d959ad5b"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04b07490bc2f6f2717b10c3969e1b830f5720b632f8ae2f3b8b1542394c47a8e"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03795b9e8a5d7fda05f3873efc3f59105e2dcff14231680296b87b80bb327295"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c483dab0f14b8d3f0df0c6c18d70b21b086f74c87ab03c59250dbf6d3c89baba"},
-    {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b2682038e255e94baf2c473dca914a7460069171ff5cdd4080be18ab8a7fd6e"},
-    {file = "pydantic_core-2.23.3-cp38-none-win32.whl", hash = "sha256:f4a57db8966b3a1d1a350012839c6a0099f0898c56512dfade8a1fe5fb278710"},
-    {file = "pydantic_core-2.23.3-cp38-none-win_amd64.whl", hash = "sha256:13dd45ba2561603681a2676ca56006d6dee94493f03d5cadc055d2055615c3ea"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82da2f4703894134a9f000e24965df73cc103e31e8c31906cc1ee89fde72cbd8"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd9be0a42de08f4b58a3cc73a123f124f65c24698b95a54c1543065baca8cf0e"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89b731f25c80830c76fdb13705c68fef6a2b6dc494402987c7ea9584fe189f5d"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6de1ec30c4bb94f3a69c9f5f2182baeda5b809f806676675e9ef6b8dc936f28"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb68b41c3fa64587412b104294b9cbb027509dc2f6958446c502638d481525ef"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c3980f2843de5184656aab58698011b42763ccba11c4a8c35936c8dd6c7068c"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94f85614f2cba13f62c3c6481716e4adeae48e1eaa7e8bac379b9d177d93947a"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:510b7fb0a86dc8f10a8bb43bd2f97beb63cffad1203071dc434dac26453955cd"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1eba2f7ce3e30ee2170410e2171867ea73dbd692433b81a93758ab2de6c64835"},
-    {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b259fd8409ab84b4041b7b3f24dcc41e4696f180b775961ca8142b5b21d0e70"},
-    {file = "pydantic_core-2.23.3-cp39-none-win32.whl", hash = "sha256:40d9bd259538dba2f40963286009bf7caf18b5112b19d2b55b09c14dde6db6a7"},
-    {file = "pydantic_core-2.23.3-cp39-none-win_amd64.whl", hash = "sha256:5a8cd3074a98ee70173a8633ad3c10e00dcb991ecec57263aacb4095c5efb958"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f399e8657c67313476a121a6944311fab377085ca7f490648c9af97fc732732d"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b5547d098c76e1694ba85f05b595720d7c60d342f24d5aad32c3049131fa5c4"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dda0290a6f608504882d9f7650975b4651ff91c85673341789a476b1159f211"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b6e5da855e9c55a0c67f4db8a492bf13d8d3316a59999cfbaf98cc6e401961"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:09e926397f392059ce0afdcac920df29d9c833256354d0c55f1584b0b70cf07e"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:87cfa0ed6b8c5bd6ae8b66de941cece179281239d482f363814d2b986b79cedc"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e61328920154b6a44d98cabcb709f10e8b74276bc709c9a513a8c37a18786cc4"},
-    {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce3317d155628301d649fe5e16a99528d5680af4ec7aa70b90b8dacd2d725c9b"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e89513f014c6be0d17b00a9a7c81b1c426f4eb9224b15433f3d98c1a071f8433"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4f62c1c953d7ee375df5eb2e44ad50ce2f5aff931723b398b8bc6f0ac159791a"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2718443bc671c7ac331de4eef9b673063b10af32a0bb385019ad61dcf2cc8f6c"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d90e08b2727c5d01af1b5ef4121d2f0c99fbee692c762f4d9d0409c9da6541"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2b676583fc459c64146debea14ba3af54e540b61762dfc0613dc4e98c3f66eeb"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:50e4661f3337977740fdbfbae084ae5693e505ca2b3130a6d4eb0f2281dc43b8"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:68f4cf373f0de6abfe599a38307f4417c1c867ca381c03df27c873a9069cda25"},
-    {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59d52cf01854cb26c46958552a21acb10dd78a52aa34c86f284e66b209db8cab"},
-    {file = "pydantic_core-2.23.3.tar.gz", hash = "sha256:3cb0f65d8b4121c1b015c60104a685feb929a29d7cf204387c7f2688c7974690"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"},
+    {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"},
+    {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"},
+    {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"},
+    {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"},
+    {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"},
+    {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"},
+    {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"},
+    {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"},
+    {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"},
+    {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"},
+    {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"},
+    {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"},
+    {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"},
+    {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"},
+    {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"},
+    {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"},
+    {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"},
+    {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"},
+    {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"},
+    {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"},
+    {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"},
 ]
 
 [package.dependencies]
@@ -1432,46 +1450,41 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "watchdog"
-version = "4.0.2"
+version = "5.0.2"
 description = "Filesystem events monitoring"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
 files = [
-    {file = "watchdog-4.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ede7f010f2239b97cc79e6cb3c249e72962404ae3865860855d5cbe708b0fd22"},
-    {file = "watchdog-4.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a2cffa171445b0efa0726c561eca9a27d00a1f2b83846dbd5a4f639c4f8ca8e1"},
-    {file = "watchdog-4.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c50f148b31b03fbadd6d0b5980e38b558046b127dc483e5e4505fcef250f9503"},
-    {file = "watchdog-4.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7c7d4bf585ad501c5f6c980e7be9c4f15604c7cc150e942d82083b31a7548930"},
-    {file = "watchdog-4.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:914285126ad0b6eb2258bbbcb7b288d9dfd655ae88fa28945be05a7b475a800b"},
-    {file = "watchdog-4.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:984306dc4720da5498b16fc037b36ac443816125a3705dfde4fd90652d8028ef"},
-    {file = "watchdog-4.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1cdcfd8142f604630deef34722d695fb455d04ab7cfe9963055df1fc69e6727a"},
-    {file = "watchdog-4.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7ab624ff2f663f98cd03c8b7eedc09375a911794dfea6bf2a359fcc266bff29"},
-    {file = "watchdog-4.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:132937547a716027bd5714383dfc40dc66c26769f1ce8a72a859d6a48f371f3a"},
-    {file = "watchdog-4.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:cd67c7df93eb58f360c43802acc945fa8da70c675b6fa37a241e17ca698ca49b"},
-    {file = "watchdog-4.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcfd02377be80ef3b6bc4ce481ef3959640458d6feaae0bd43dd90a43da90a7d"},
-    {file = "watchdog-4.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:980b71510f59c884d684b3663d46e7a14b457c9611c481e5cef08f4dd022eed7"},
-    {file = "watchdog-4.0.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:aa160781cafff2719b663c8a506156e9289d111d80f3387cf3af49cedee1f040"},
-    {file = "watchdog-4.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f6ee8dedd255087bc7fe82adf046f0b75479b989185fb0bdf9a98b612170eac7"},
-    {file = "watchdog-4.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0b4359067d30d5b864e09c8597b112fe0a0a59321a0f331498b013fb097406b4"},
-    {file = "watchdog-4.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:770eef5372f146997638d737c9a3c597a3b41037cfbc5c41538fc27c09c3a3f9"},
-    {file = "watchdog-4.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eeea812f38536a0aa859972d50c76e37f4456474b02bd93674d1947cf1e39578"},
-    {file = "watchdog-4.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b2c45f6e1e57ebb4687690c05bc3a2c1fb6ab260550c4290b8abb1335e0fd08b"},
-    {file = "watchdog-4.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:10b6683df70d340ac3279eff0b2766813f00f35a1d37515d2c99959ada8f05fa"},
-    {file = "watchdog-4.0.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f7c739888c20f99824f7aa9d31ac8a97353e22d0c0e54703a547a218f6637eb3"},
-    {file = "watchdog-4.0.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c100d09ac72a8a08ddbf0629ddfa0b8ee41740f9051429baa8e31bb903ad7508"},
-    {file = "watchdog-4.0.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:f5315a8c8dd6dd9425b974515081fc0aadca1d1d61e078d2246509fd756141ee"},
-    {file = "watchdog-4.0.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2d468028a77b42cc685ed694a7a550a8d1771bb05193ba7b24006b8241a571a1"},
-    {file = "watchdog-4.0.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f15edcae3830ff20e55d1f4e743e92970c847bcddc8b7509bcd172aa04de506e"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:936acba76d636f70db8f3c66e76aa6cb5136a936fc2a5088b9ce1c7a3508fc83"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_armv7l.whl", hash = "sha256:e252f8ca942a870f38cf785aef420285431311652d871409a64e2a0a52a2174c"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_i686.whl", hash = "sha256:0e83619a2d5d436a7e58a1aea957a3c1ccbf9782c43c0b4fed80580e5e4acd1a"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_ppc64.whl", hash = "sha256:88456d65f207b39f1981bf772e473799fcdc10801062c36fd5ad9f9d1d463a73"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:32be97f3b75693a93c683787a87a0dc8db98bb84701539954eef991fb35f5fbc"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:c82253cfc9be68e3e49282831afad2c1f6593af80c0daf1287f6a92657986757"},
-    {file = "watchdog-4.0.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c0b14488bd336c5b1845cee83d3e631a1f8b4e9c5091ec539406e4a324f882d8"},
-    {file = "watchdog-4.0.2-py3-none-win32.whl", hash = "sha256:0d8a7e523ef03757a5aa29f591437d64d0d894635f8a50f370fe37f913ce4e19"},
-    {file = "watchdog-4.0.2-py3-none-win_amd64.whl", hash = "sha256:c344453ef3bf875a535b0488e3ad28e341adbd5a9ffb0f7d62cefacc8824ef2b"},
-    {file = "watchdog-4.0.2-py3-none-win_ia64.whl", hash = "sha256:baececaa8edff42cd16558a639a9b0ddf425f93d892e8392a56bf904f5eff22c"},
-    {file = "watchdog-4.0.2.tar.gz", hash = "sha256:b4dfbb6c49221be4535623ea4474a4d6ee0a9cef4a80b20c28db4d858b64e270"},
+    {file = "watchdog-5.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d961f4123bb3c447d9fcdcb67e1530c366f10ab3a0c7d1c0c9943050936d4877"},
+    {file = "watchdog-5.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72990192cb63872c47d5e5fefe230a401b87fd59d257ee577d61c9e5564c62e5"},
+    {file = "watchdog-5.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6bec703ad90b35a848e05e1b40bf0050da7ca28ead7ac4be724ae5ac2653a1a0"},
+    {file = "watchdog-5.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae7a1879918f6544201d33666909b040a46421054a50e0f773e0d870ed7438d"},
+    {file = "watchdog-5.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c4a440f725f3b99133de610bfec93d570b13826f89616377715b9cd60424db6e"},
+    {file = "watchdog-5.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8b2918c19e0d48f5f20df458c84692e2a054f02d9df25e6c3c930063eca64c1"},
+    {file = "watchdog-5.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:aa9cd6e24126d4afb3752a3e70fce39f92d0e1a58a236ddf6ee823ff7dba28ee"},
+    {file = "watchdog-5.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f627c5bf5759fdd90195b0c0431f99cff4867d212a67b384442c51136a098ed7"},
+    {file = "watchdog-5.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d7594a6d32cda2b49df3fd9abf9b37c8d2f3eab5df45c24056b4a671ac661619"},
+    {file = "watchdog-5.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba32efcccfe2c58f4d01115440d1672b4eb26cdd6fc5b5818f1fb41f7c3e1889"},
+    {file = "watchdog-5.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:963f7c4c91e3f51c998eeff1b3fb24a52a8a34da4f956e470f4b068bb47b78ee"},
+    {file = "watchdog-5.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8c47150aa12f775e22efff1eee9f0f6beee542a7aa1a985c271b1997d340184f"},
+    {file = "watchdog-5.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:14dd4ed023d79d1f670aa659f449bcd2733c33a35c8ffd88689d9d243885198b"},
+    {file = "watchdog-5.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b84bff0391ad4abe25c2740c7aec0e3de316fdf7764007f41e248422a7760a7f"},
+    {file = "watchdog-5.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e8d5ff39f0a9968952cce548e8e08f849141a4fcc1290b1c17c032ba697b9d7"},
+    {file = "watchdog-5.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fb223456db6e5f7bd9bbd5cd969f05aae82ae21acc00643b60d81c770abd402b"},
+    {file = "watchdog-5.0.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9814adb768c23727a27792c77812cf4e2fd9853cd280eafa2bcfa62a99e8bd6e"},
+    {file = "watchdog-5.0.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:901ee48c23f70193d1a7bc2d9ee297df66081dd5f46f0ca011be4f70dec80dab"},
+    {file = "watchdog-5.0.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:638bcca3d5b1885c6ec47be67bf712b00a9ab3d4b22ec0881f4889ad870bc7e8"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5597c051587f8757798216f2485e85eac583c3b343e9aa09127a3a6f82c65ee8"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_armv7l.whl", hash = "sha256:53ed1bf71fcb8475dd0ef4912ab139c294c87b903724b6f4a8bd98e026862e6d"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_i686.whl", hash = "sha256:29e4a2607bd407d9552c502d38b45a05ec26a8e40cc7e94db9bb48f861fa5abc"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_ppc64.whl", hash = "sha256:b6dc8f1d770a8280997e4beae7b9a75a33b268c59e033e72c8a10990097e5fde"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:d2ab34adc9bf1489452965cdb16a924e97d4452fcf88a50b21859068b50b5c3b"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:7d1aa7e4bb0f0c65a1a91ba37c10e19dabf7eaaa282c5787e51371f090748f4b"},
+    {file = "watchdog-5.0.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:726eef8f8c634ac6584f86c9c53353a010d9f311f6c15a034f3800a7a891d941"},
+    {file = "watchdog-5.0.2-py3-none-win32.whl", hash = "sha256:bda40c57115684d0216556671875e008279dea2dc00fcd3dde126ac8e0d7a2fb"},
+    {file = "watchdog-5.0.2-py3-none-win_amd64.whl", hash = "sha256:d010be060c996db725fbce7e3ef14687cdcc76f4ca0e4339a68cc4532c382a73"},
+    {file = "watchdog-5.0.2-py3-none-win_ia64.whl", hash = "sha256:3960136b2b619510569b90f0cd96408591d6c251a75c97690f4553ca88889769"},
+    {file = "watchdog-5.0.2.tar.gz", hash = "sha256:dcebf7e475001d2cdeb020be630dc5b687e9acdd60d16fea6bb4508e7b94cf76"},
 ]
 
 [package.extras]
@@ -1584,5 +1597,5 @@ multidict = ">=4.0"
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.8.1,<4.0"
-content-hash = "b6b18aa680d8841dd68ccbdf6b821fda271ac0e9160fbccf22c4cebc69fbb668"
+python-versions = ">=3.9,<4.0"
+content-hash = "7a08e2786a8b70e328e92fbf014c8910fb64c0c55476c0dd050d61ffa636a4e5"
diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
index 23f72644..94e0befe 100644
--- a/libs/ai-endpoints/pyproject.toml
+++ b/libs/ai-endpoints/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-nvidia-ai-endpoints"
-version = "0.2.2"
+version = "0.3.0"
 description = "An integration package connecting NVIDIA AI Endpoints and LangChain"
 authors = []
 readme = "README.md"
@@ -11,8 +11,8 @@ license = "MIT"
 "Source Code" = "https://github.com/langchain-ai/langchain-nvidia/tree/main/libs/ai-endpoints"
 
 [tool.poetry.dependencies]
-python = ">=3.8.1,<4.0"
-langchain-core = ">=0.2.22,<0.3"
+python = ">=3.9,<4.0"
+langchain-core = ">=0.3.0,<0.4"
 aiohttp = "^3.9.1"
 pillow = ">=10.0.0,<11.0.0"
 
@@ -26,8 +26,9 @@ pytest-mock = "^3.10.0"
 syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
-langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core", tag = "langchain-core==0.2.40" }
+langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
 requests-mock = "^1.11.0"
+langchain-standard-tests = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/standard-tests" }
 faker = "^24.4.0"
 
 [tool.poetry.group.codespell]
@@ -52,13 +53,13 @@ ruff = "^0.1.5"
 mypy = "^0.991"
 types-requests = "^2.31.0.10"
 types-pillow = "^10.2.0.20240125"
-langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core", tag = "langchain-core==0.2.40" }
+langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
 
 [tool.poetry.group.dev]
 optional = true
 
 [tool.poetry.group.dev.dependencies]
-langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core", tag = "langchain-core==0.2.40" }
+langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
 
 [tool.ruff.lint]
 select = [
diff --git a/libs/ai-endpoints/scripts/check_pydantic.sh b/libs/ai-endpoints/scripts/check_pydantic.sh
deleted file mode 100755
index d0fa31d6..00000000
--- a/libs/ai-endpoints/scripts/check_pydantic.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-#
-# This script searches for lines starting with "import pydantic" or "from pydantic"
-# in tracked files within a Git repository.
-#
-# Usage: ./scripts/check_pydantic.sh /path/to/repository
-
-# Check if a path argument is provided
-if [ $# -ne 1 ]; then
-  echo "Usage: $0 /path/to/repository"
-  exit 1
-fi
-
-repository_path="$1"
-
-# Search for lines matching the pattern within the specified repository
-result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic' | grep -v "# ignore: check_pydantic")
-
-# Check if any matching lines were found
-if [ -n "$result" ]; then
-  echo "ERROR: The following lines need to be updated:"
-  echo "$result"
-  echo "Please replace the code with an import from langchain_core.pydantic_v1."
-  echo "For example, replace 'from pydantic import BaseModel'"
-  echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
-  exit 1
-fi
diff --git a/libs/ai-endpoints/tests/data/nvidia-picasso-large.png b/libs/ai-endpoints/tests/data/nvidia-picasso-large.png
new file mode 100644
index 00000000..5ccac1a1
Binary files /dev/null and b/libs/ai-endpoints/tests/data/nvidia-picasso-large.png differ
diff --git a/libs/ai-endpoints/tests/data/nvidia-picasso.gif b/libs/ai-endpoints/tests/data/nvidia-picasso.gif
new file mode 100644
index 00000000..bed37022
Binary files /dev/null and b/libs/ai-endpoints/tests/data/nvidia-picasso.gif differ
diff --git a/libs/ai-endpoints/tests/data/nvidia-picasso.png b/libs/ai-endpoints/tests/data/nvidia-picasso.png
new file mode 100644
index 00000000..45beb713
Binary files /dev/null and b/libs/ai-endpoints/tests/data/nvidia-picasso.png differ
diff --git a/libs/ai-endpoints/tests/data/nvidia-picasso.webp b/libs/ai-endpoints/tests/data/nvidia-picasso.webp
new file mode 100644
index 00000000..db596dc1
Binary files /dev/null and b/libs/ai-endpoints/tests/data/nvidia-picasso.webp differ
diff --git a/libs/ai-endpoints/tests/integration_tests/conftest.py b/libs/ai-endpoints/tests/integration_tests/conftest.py
index 15671c7f..6cebcf67 100644
--- a/libs/ai-endpoints/tests/integration_tests/conftest.py
+++ b/libs/ai-endpoints/tests/integration_tests/conftest.py
@@ -10,6 +10,18 @@
     NVIDIARerank,
 )
 from langchain_nvidia_ai_endpoints._statics import MODEL_TABLE, Model
+from langchain_nvidia_ai_endpoints.chat_models import (
+    _DEFAULT_MODEL_NAME as DEFAULT_CHAT_MODEL,
+)
+from langchain_nvidia_ai_endpoints.embeddings import (
+    _DEFAULT_MODEL_NAME as DEFAULT_EMBEDDINGS_MODEL,
+)
+from langchain_nvidia_ai_endpoints.llm import (
+    _DEFAULT_MODEL_NAME as DEFAULT_COMPLETIONS_MODEL,
+)
+from langchain_nvidia_ai_endpoints.reranking import (
+    _DEFAULT_MODEL_NAME as DEFAULT_RERANKING_MODEL,
+)
 
 
 def get_mode(config: pytest.Config) -> dict:
@@ -87,7 +99,7 @@ def get_all_known_models() -> List[Model]:
         return list(MODEL_TABLE.values())
 
     if "chat_model" in metafunc.fixturenames:
-        models = [ChatNVIDIA._default_model_name]
+        models = [DEFAULT_CHAT_MODEL]
         if model_list := metafunc.config.getoption("chat_model_id"):
             models = model_list
         if metafunc.config.getoption("all_models"):
@@ -111,7 +123,7 @@ def get_all_known_models() -> List[Model]:
         metafunc.parametrize("tool_model", models, ids=models)
 
     if "completions_model" in metafunc.fixturenames:
-        models = [NVIDIA._default_model_name]
+        models = [DEFAULT_COMPLETIONS_MODEL]
         if model_list := metafunc.config.getoption("completions_model_id"):
             models = model_list
         if metafunc.config.getoption("all_models"):
@@ -135,7 +147,7 @@ def get_all_known_models() -> List[Model]:
         metafunc.parametrize("structured_model", models, ids=models)
 
     if "rerank_model" in metafunc.fixturenames:
-        models = [NVIDIARerank._default_model_name]
+        models = [DEFAULT_RERANKING_MODEL]
         if model_list := metafunc.config.getoption("rerank_model_id"):
             models = model_list
         if metafunc.config.getoption("all_models"):
@@ -150,7 +162,7 @@ def get_all_known_models() -> List[Model]:
             models = [
                 model.id
                 for model in get_all_known_models()
-                if model.model_type == "vlm"
+                if model.model_type in {"vlm", "nv-vlm"}
             ]
         metafunc.parametrize("vlm_model", models, ids=models)
 
@@ -167,7 +179,7 @@ def get_all_known_models() -> List[Model]:
         metafunc.parametrize("qa_model", models, ids=models)
 
     if "embedding_model" in metafunc.fixturenames:
-        models = [NVIDIAEmbeddings._default_model_name]
+        models = [DEFAULT_EMBEDDINGS_MODEL]
         if metafunc.config.getoption("all_models"):
             models = [model.id for model in NVIDIAEmbeddings(**mode).available_models]
         if model_list := metafunc.config.getoption("embedding_model_id"):
diff --git a/libs/ai-endpoints/tests/integration_tests/test_bind_tools.py b/libs/ai-endpoints/tests/integration_tests/test_bind_tools.py
index c9a84b69..aaee110a 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_bind_tools.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_bind_tools.py
@@ -1,5 +1,7 @@
 import json
 import warnings
+from functools import reduce
+from operator import add
 from typing import Any, Callable, List, Literal, Optional, Union
 
 import pytest
@@ -9,8 +11,8 @@
     BaseMessage,
     BaseMessageChunk,
 )
-from langchain_core.pydantic_v1 import Field
 from langchain_core.tools import tool
+from pydantic import Field
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
@@ -736,3 +738,52 @@ def test_accuracy_parallel_tool_calls_easy(
     tool_call1 = response.tool_calls[1]
     assert tool_call1["name"] == "get_current_weather"
     assert tool_call1["args"]["location"] in valid_args
+
+
+@pytest.mark.xfail(reason="Server producing invalid response")
+def test_stream_usage_metadata(
+    tool_model: str,
+    mode: dict,
+) -> None:
+    """
+    This is a regression test for the server. The server was returning
+    usage metadata multiple times resulting in incorrect aggregate
+    usage data.
+
+    We use invoke to get the baseline usage metadata and then compare
+    the usage metadata from the stream to the baseline.
+    """
+
+    @tool
+    def magic(
+        num: int = Field(..., description="Number to magic"),
+    ) -> int:
+        """Magic a number"""
+        return (num**num) % num
+
+    prompt = "What is magic(42)?"
+    llm = ChatNVIDIA(model=tool_model, **mode).bind_tools(
+        [magic], tool_choice="required"
+    )
+    baseline = llm.invoke(prompt)
+    assert isinstance(baseline, AIMessage)
+    assert baseline.usage_metadata is not None
+    baseline_in, baseline_out, baseline_total = (
+        baseline.usage_metadata["input_tokens"],
+        baseline.usage_metadata["output_tokens"],
+        baseline.usage_metadata["total_tokens"],
+    )
+    assert baseline_in + baseline_out == baseline_total
+    response = reduce(add, llm.stream(prompt))
+    assert isinstance(response, AIMessage)
+    assert response.usage_metadata is not None
+    tolerance = 1.25  # allow for streaming to be 25% higher than invoke
+    response_in, response_out, response_total = (
+        response.usage_metadata["input_tokens"],
+        response.usage_metadata["output_tokens"],
+        response.usage_metadata["total_tokens"],
+    )
+    assert response_in + response_out == response_total
+    assert response_in < baseline_in * tolerance
+    assert response_out < baseline_out * tolerance
+    assert response_total < baseline_total * tolerance
diff --git a/libs/ai-endpoints/tests/integration_tests/test_embeddings.py b/libs/ai-endpoints/tests/integration_tests/test_embeddings.py
index 23733e25..998d5ade 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_embeddings.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_embeddings.py
@@ -6,6 +6,7 @@
 import pytest
 
 from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+from langchain_nvidia_ai_endpoints.embeddings import _DEFAULT_BATCH_SIZE
 
 
 def test_embed_query(embedding_model: str, mode: dict) -> None:
@@ -62,7 +63,7 @@ def test_embed_query_long_text(embedding_model: str, mode: dict) -> None:
 
 def test_embed_documents_batched_texts(embedding_model: str, mode: dict) -> None:
     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
-    count = NVIDIAEmbeddings._default_max_batch_size * 2 + 1
+    count = _DEFAULT_BATCH_SIZE * 2 + 1
     texts = ["nvidia " * 32] * count
     output = embedding.embed_documents(texts)
     assert len(output) == count
@@ -73,7 +74,7 @@ def test_embed_documents_mixed_long_texts(embedding_model: str, mode: dict) -> N
     if embedding_model in ["playground_nvolveqa_40k", "nvolveqa_40k"]:
         pytest.skip("Skip test for nvolveqa-40k due to compat override of truncate")
     embedding = NVIDIAEmbeddings(model=embedding_model, **mode)
-    count = NVIDIAEmbeddings._default_max_batch_size * 2 - 1
+    count = _DEFAULT_BATCH_SIZE * 2 - 1
     texts = ["nvidia " * 32] * count
     texts[len(texts) // 2] = "nvidia " * 10240
     with pytest.raises(Exception):
diff --git a/libs/ai-endpoints/tests/integration_tests/test_ranking.py b/libs/ai-endpoints/tests/integration_tests/test_ranking.py
index 06f9444b..47fc8438 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_ranking.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_ranking.py
@@ -66,11 +66,11 @@ def test_langchain_reranker_direct_empty_docs(
 def test_langchain_reranker_direct_top_n_negative(
     query: str, documents: List[Document], rerank_model: str, mode: dict
 ) -> None:
-    orig = NVIDIARerank.Config.validate_assignment
-    NVIDIARerank.Config.validate_assignment = False
+    orig = NVIDIARerank.model_config["validate_assignment"]
+    NVIDIARerank.model_config["validate_assignment"] = False
     ranker = NVIDIARerank(model=rerank_model, **mode)
     ranker.top_n = -100
-    NVIDIARerank.Config.validate_assignment = orig
+    NVIDIARerank.model_config["validate_assignment"] = orig
     result_docs = ranker.compress_documents(documents=documents, query=query)
     assert len(result_docs) == 0
 
diff --git a/libs/ai-endpoints/tests/integration_tests/test_standard.py b/libs/ai-endpoints/tests/integration_tests/test_standard.py
new file mode 100644
index 00000000..983124c0
--- /dev/null
+++ b/libs/ai-endpoints/tests/integration_tests/test_standard.py
@@ -0,0 +1,23 @@
+"""Standard LangChain interface tests"""
+
+from typing import Type
+
+import pytest
+from langchain_core.language_models import BaseChatModel
+from langchain_standard_tests.integration_tests import ChatModelIntegrationTests
+
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+
+class TestNVIDIAStandard(ChatModelIntegrationTests):
+    @property
+    def chat_model_class(self) -> Type[BaseChatModel]:
+        return ChatNVIDIA
+
+    @property
+    def chat_model_params(self) -> dict:
+        return {"model": "meta/llama-3.1-8b-instruct"}
+
+    @pytest.mark.xfail(reason="anthropic-style list content not supported")
+    def test_tool_message_histories_list_content(self, model: BaseChatModel) -> None:
+        return super().test_tool_message_histories_list_content(model)
diff --git a/libs/ai-endpoints/tests/integration_tests/test_structured_output.py b/libs/ai-endpoints/tests/integration_tests/test_structured_output.py
index 3f4f5aa8..1f059e75 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_structured_output.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_structured_output.py
@@ -3,7 +3,7 @@
 
 import pytest
 from langchain_core.messages import HumanMessage
-from langchain_core.pydantic_v1 import BaseModel, Field
+from pydantic import BaseModel, Field
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
diff --git a/libs/ai-endpoints/tests/integration_tests/test_vlm_models.py b/libs/ai-endpoints/tests/integration_tests/test_vlm_models.py
index 260073c6..b261b4ee 100644
--- a/libs/ai-endpoints/tests/integration_tests/test_vlm_models.py
+++ b/libs/ai-endpoints/tests/integration_tests/test_vlm_models.py
@@ -1,26 +1,34 @@
 import base64
+import os
 from typing import Any, Dict, List, Union
 
 import pytest
+import requests
 from langchain_core.messages import BaseMessage, HumanMessage
 
 from langchain_nvidia_ai_endpoints.chat_models import ChatNVIDIA
 
-# todo: test S3 bucket asset id
-# todo: sizes
-# todo: formats
-# todo: multiple images
 # todo: multiple texts
-# todo: detail (fidelity)
+# todo: accuracy tests
 
+#
+# API Specification -
+#
+#  - User message may contain 1 or more image_url
+#  - image_url contains a url and optional detail
+#  - detail is one of "low", "high" or "auto" (default)
+#  - url is either a url to an image or base64 encoded image
+#  - format for base64 is "data:image/png;{type}},..."
+#  - supported image types are png, jpeg (or jpg), webp, gif (non-animated)
+#
 
 #
 # note: differences between api catalog and openai api
-#  - openai api supports server-side image download, api catalog does not
+#  - openai api supports server-side image download, api catalog does not consistently
 #   - ChatNVIDIA does client side download to simulate the same behavior
 #  - ChatNVIDIA will automatically read local files and convert them to base64
-#  - openai api uses {"image_url": {"url": "..."}}
-#     where api catalog uses {"image_url": "..."}
+#  - openai api always uses {"image_url": {"url": "..."}}
+#     where api catalog sometimes uses {"image_url": "..."}
 #
 
 
@@ -40,7 +48,7 @@
             {
                 "type": "image_url",
                 "image_url": {
-                    "url": f"""data:image/png;base64,{
+                    "url": f"""data:image/jpg;base64,{
                         base64.b64encode(
                             open('tests/data/nvidia-picasso.jpg', 'rb').read()
                         ).decode('utf-8')
@@ -48,16 +56,254 @@
                 },
             }
         ],
+        f"""<img src="data:image/jpg;base64,{
+            base64.b64encode(
+                open('tests/data/nvidia-picasso.jpg', 'rb').read()
+            ).decode('utf-8')
+        }"/>""",
     ],
-    ids=["url", "file", "tag"],
+    ids=["url", "file", "data", "tag"],
+)
+@pytest.mark.parametrize(
+    "func",
+    ["invoke", "stream"],
+)
+def test_vlm_input_style(
+    vlm_model: str,
+    mode: dict,
+    func: str,
+    content: Union[str, List[Union[str, Dict[str, Any]]]],
+) -> None:
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    if func == "invoke":
+        response = chat.invoke([HumanMessage(content=content)])
+        assert isinstance(response, BaseMessage)
+        assert isinstance(response.content, str)
+    if func == "stream":
+        for token in chat.stream([HumanMessage(content=content)]):
+            assert isinstance(token.content, str)
+
+
+@pytest.mark.parametrize(
+    "detail",
+    ["low", "high", "auto", None],
+    ids=["low", "high", "auto", "none"],
 )
-def test_vlm_model(
-    vlm_model: str, mode: dict, content: Union[str, List[Union[str, Dict[Any, Any]]]]
+def test_vlm_detail_accepted(
+    vlm_model: str,
+    mode: dict,
+    detail: str,
 ) -> None:
     chat = ChatNVIDIA(model=vlm_model, **mode)
-    response = chat.invoke([HumanMessage(content=content)])
+    response = chat.invoke(
+        [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "tests/data/nvidia-picasso.jpg",
+                            "detail": detail,
+                        },
+                    }
+                ]
+            )
+        ]
+    )
     assert isinstance(response, BaseMessage)
     assert isinstance(response.content, str)
+    # assert "cat" in response.content.lower()
 
-    for token in chat.stream([HumanMessage(content=content)]):
-        assert isinstance(token.content, str)
+
+@pytest.mark.parametrize(
+    "img",
+    [
+        "tests/data/nvidia-picasso.jpg",
+        "tests/data/nvidia-picasso.png",
+        "tests/data/nvidia-picasso.webp",
+        "tests/data/nvidia-picasso.gif",
+    ],
+    ids=["jpg", "png", "webp", "gif"],
+)
+def test_vlm_image_type(
+    vlm_model: str,
+    mode: dict,
+    img: str,
+) -> None:
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    response = chat.invoke(
+        [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": img,
+                        },
+                    }
+                ]
+            )
+        ]
+    )
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_vlm_image_large(
+    vlm_model: str,
+    mode: dict,
+) -> None:
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    response = chat.invoke(
+        [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "tests/data/nvidia-picasso-large.png",
+                        },
+                    }
+                ]
+            )
+        ]
+    )
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_vlm_no_images(
+    vlm_model: str,
+    mode: dict,
+) -> None:
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    response = chat.invoke(
+        [HumanMessage(content="What is the capital of Massachusetts?")]
+    )
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_vlm_two_images(
+    vlm_model: str,
+    mode: dict,
+) -> None:
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    response = chat.invoke(
+        [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "tests/data/nvidia-picasso.jpg",
+                        },
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "tests/data/nvidia-picasso.jpg",
+                        },
+                    },
+                ]
+            )
+        ]
+    )
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+@pytest.fixture(scope="session")
+def asset_id() -> str:
+    # create an asset following -
+    #  https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/assets.html
+
+    def create_asset_and_get_upload_url(
+        token: str, content_type: str, description: str
+    ) -> dict:
+        url = "https://api.nvcf.nvidia.com/v2/nvcf/assets"
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "accept": "application/json",
+            "Content-Type": "application/json",
+        }
+        data = {"contentType": content_type, "description": description}
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()
+        return response.json()
+
+    def upload_image_to_presigned_url(
+        image_path: str, upload_url: str, content_type: str, description: str
+    ) -> None:
+        headers = {
+            "Content-Type": content_type,
+            "x-amz-meta-nvcf-asset-description": description,
+        }
+        with open(image_path, "rb") as image_file:
+            response = requests.put(upload_url, headers=headers, data=image_file)
+            response.raise_for_status()
+
+    content_type = "image/jpg"
+    description = "lc-nv-ai-e-test-nvidia-picasso"
+
+    asset_info = create_asset_and_get_upload_url(
+        os.environ["NVIDIA_API_KEY"], content_type, description
+    )
+    asset_id = asset_info["assetId"]
+
+    upload_image_to_presigned_url(
+        "tests/data/nvidia-picasso.jpg",
+        asset_info["uploadUrl"],
+        content_type,
+        description,
+    )
+
+    return asset_id
+
+
+@pytest.mark.parametrize(
+    "content",
+    [
+        [
+            {
+                "type": "image_url",
+                "image_url": {"url": "data:image/jpg;asset_id,{asset_id}"},
+            }
+        ],
+        [
+            """<img src="data:image/jpg;asset_id,{asset_id}"/>""",
+        ],
+        """<img src="data:image/jpg;asset_id,{asset_id}"/>""",
+    ],
+    ids=["data", "list-of-tag", "tag"],
+)
+@pytest.mark.parametrize(
+    "func",
+    ["invoke", "stream"],
+)
+def test_vlm_asset_id(
+    vlm_model: str,
+    mode: dict,
+    content: Union[str, List[Union[str, Dict[str, Any]]]],
+    func: str,
+    asset_id: str,
+) -> None:
+    if isinstance(content, str):
+        content = content.format(asset_id=asset_id)
+    elif isinstance(content, list):
+        for item in content:
+            if isinstance(item, str):
+                item = item.format(asset_id=asset_id)
+            elif isinstance(item, dict):
+                item["image_url"]["url"] = item["image_url"]["url"].format(
+                    asset_id=asset_id
+                )
+
+    chat = ChatNVIDIA(model=vlm_model, **mode)
+    if func == "invoke":
+        response = chat.invoke([HumanMessage(content=content)])
+        assert isinstance(response, BaseMessage)
+        assert isinstance(response.content, str)
+    if func == "stream":
+        for token in chat.stream([HumanMessage(content=content)]):
+            assert isinstance(token.content, str)
diff --git a/libs/ai-endpoints/tests/unit_tests/test_202_polling.py b/libs/ai-endpoints/tests/unit_tests/test_202_polling.py
new file mode 100644
index 00000000..a3a5e644
--- /dev/null
+++ b/libs/ai-endpoints/tests/unit_tests/test_202_polling.py
@@ -0,0 +1,60 @@
+import warnings
+
+import requests_mock
+from langchain_core.messages import AIMessage
+
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+
+def test_polling_auth_header(
+    requests_mock: requests_mock.Mocker,
+    mock_model: str,
+) -> None:
+    infer_url = "https://integrate.api.nvidia.com/v1/chat/completions"
+    polling_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/test-request-id"
+
+    requests_mock.post(
+        infer_url, status_code=202, headers={"NVCF-REQID": "test-request-id"}, json={}
+    )
+
+    requests_mock.get(
+        polling_url,
+        status_code=200,
+        json={
+            "id": "mock-id",
+            "created": 1234567890,
+            "object": "chat.completion",
+            "model": mock_model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "WORKED"},
+                }
+            ],
+        },
+    )
+
+    warnings.filterwarnings("ignore", r".*type is unknown and inference may fail.*")
+    client = ChatNVIDIA(model=mock_model, api_key="BOGUS")
+    response = client.invoke("IGNORED")
+
+    # expected behavior -
+    #  - first a GET request to /v1/models to check the model exists
+    #  - second a POST request to /v1/chat/completions
+    #  - third a GET request to /v2/nvcf/pexec/status/test-request-id
+    # we want to check on the second and third requests
+
+    assert len(requests_mock.request_history) == 3
+
+    infer_request = requests_mock.request_history[-2]
+    assert infer_request.method == "POST"
+    assert infer_request.url == infer_url
+    assert infer_request.headers["Authorization"] == "Bearer BOGUS"
+
+    poll_request = requests_mock.request_history[-1]
+    assert poll_request.method == "GET"
+    assert poll_request.url == polling_url
+    assert poll_request.headers["Authorization"] == "Bearer BOGUS"
+
+    assert isinstance(response, AIMessage)
+    assert response.content == "WORKED"
diff --git a/libs/ai-endpoints/tests/unit_tests/test_api_key.py b/libs/ai-endpoints/tests/unit_tests/test_api_key.py
index 2564b05b..552c4703 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_api_key.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_api_key.py
@@ -3,7 +3,7 @@
 from typing import Any, Generator
 
 import pytest
-from langchain_core.pydantic_v1 import SecretStr
+from pydantic import SecretStr
 from requests_mock import Mocker
 
 
diff --git a/libs/ai-endpoints/tests/unit_tests/test_base_url.py b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
index 0baaca6c..fb9c513a 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_base_url.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_base_url.py
@@ -28,10 +28,9 @@ def mock_v1_local_models(requests_mock: Mocker) -> None:
 
 def test_create_without_base_url(public_class: type) -> None:
     with no_env_var("NVIDIA_BASE_URL"):
-        assert (
-            public_class(api_key="BOGUS").base_url
-            == "https://integrate.api.nvidia.com/v1"
-        )
+        x = public_class(api_key="BOGUS")
+        assert x.base_url == "https://integrate.api.nvidia.com/v1"
+        assert x._client.base_url == "https://integrate.api.nvidia.com/v1"
 
 
 @pytest.mark.parametrize(
diff --git a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
index a8f044f9..175f80dc 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_bind_tools.py
@@ -2,7 +2,7 @@
 import warnings
 from functools import reduce
 from operator import add
-from typing import Any, List
+from typing import Annotated, Any, List
 
 import pytest
 import requests_mock
@@ -13,8 +13,8 @@
     HumanMessage,
     ToolMessage,
 )
-from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.tools import tool
+from pydantic import BaseModel, Field
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
@@ -32,7 +32,7 @@ class xxyyzz_cls(BaseModel):
 
 
 @tool
-def xxyyzz_tool(
+def xxyyzz_tool_field(
     a: int = Field(..., description="First number"),
     b: int = Field(..., description="Second number"),
 ) -> int:
@@ -40,14 +40,24 @@ def xxyyzz_tool(
     return 42
 
 
+@tool
+def xxyyzz_tool_annotated(
+    a: Annotated[int, "First number"],
+    b: Annotated[int, "Second number"],
+) -> int:
+    """xxyyzz two numbers"""
+    return 42
+
+
 @pytest.mark.parametrize(
     "tools, choice",
     [
         ([xxyyzz_func], "xxyyzz_func"),
         ([xxyyzz_cls], "xxyyzz_cls"),
-        ([xxyyzz_tool], "xxyyzz_tool"),
+        ([xxyyzz_tool_field], "xxyyzz_tool_field"),
+        ([xxyyzz_tool_annotated], "xxyyzz_tool_annotated"),
     ],
-    ids=["func", "cls", "tool"],
+    ids=["func", "cls", "tool_field", "tool_annotated"],
 )
 def test_bind_tool_and_select(tools: Any, choice: str) -> None:
     warnings.filterwarnings(
@@ -62,9 +72,10 @@ def test_bind_tool_and_select(tools: Any, choice: str) -> None:
         ([], "wrong"),
         ([xxyyzz_func], "wrong_xxyyzz_func"),
         ([xxyyzz_cls], "wrong_xxyyzz_cls"),
-        ([xxyyzz_tool], "wrong_xxyyzz_tool"),
+        ([xxyyzz_tool_field], "wrong_xxyyzz_tool_field"),
+        ([xxyyzz_tool_annotated], "wrong_xxyyzz_tool_annotated"),
     ],
-    ids=["empty", "func", "cls", "tool"],
+    ids=["empty", "func", "cls", "tool_field", "tool_annotated"],
 )
 def test_bind_tool_and_select_negative(tools: Any, choice: str) -> None:
     warnings.filterwarnings(
@@ -163,7 +174,7 @@ def test_invoke_response_parsing(
             r'"{\""',
             r'"input\""',
             r'"\":"',
-            r"3",
+            r'"3"',
             r'"}"',
         ],
         [r'"{\"intput\": 3}"'],
@@ -185,6 +196,7 @@ def test_stream_response_parsing(
                     for argument in argument_chunks
                 ],
                 'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}]}',  # noqa: E501
+                'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","choices":[],"usage":{"prompt_tokens":20,"total_tokens":42,"completion_tokens":22}}',  # noqa: E501
                 "data: [DONE]",
             ]
         ),
@@ -260,3 +272,31 @@ def test_regression_ai_null_content(
     assistant.content = None  # type: ignore
     llm.invoke([assistant])
     llm.stream([assistant])
+
+
+def test_stream_usage_metadata(
+    requests_mock: requests_mock.Mocker,
+) -> None:
+    requests_mock.post(
+        "https://integrate.api.nvidia.com/v1/chat/completions",
+        text="\n\n".join(
+            [
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"role":"assistant","content":null},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"ID1","type":"function","function":{"name":"magic_function","arguments":""}}]},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"in"}}]},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"put\":"}}]},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"usage":null,"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" 3}"}}]},"logprobs":null,"finish_reason":null}]}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null}',  # noqa: E501
+                r'data: {"id":"ID0","object":"chat.completion.chunk","created":1234567890,"model":"BOGUS","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":76,"completion_tokens":29,"total_tokens":105}}',  # noqa: E501
+                r"data: [DONE]",
+            ]
+        ),
+    )
+
+    llm = ChatNVIDIA(api_key="BOGUS")
+    response = reduce(add, llm.stream("IGNROED"))
+    assert isinstance(response, AIMessage)
+    assert response.usage_metadata is not None
+    assert response.usage_metadata["input_tokens"] == 76
+    assert response.usage_metadata["output_tokens"] == 29
+    assert response.usage_metadata["total_tokens"] == 105
diff --git a/libs/ai-endpoints/tests/unit_tests/test_model.py b/libs/ai-endpoints/tests/unit_tests/test_model.py
index bf19b806..4ef37849 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_model.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_model.py
@@ -96,7 +96,7 @@ def test_aliases(alias: str, client: Any) -> None:
     """
     with pytest.warns(UserWarning) as record:
         x = client(model=alias, nvidia_api_key="a-bogus-key")
-        assert x.model == x._client.model_name
+        assert x.model == x._client.mdl_name
     assert isinstance(record[0].message, Warning)
     assert "deprecated" in record[0].message.args[0]
 
@@ -163,7 +163,7 @@ def test_default_lora(public_class: type) -> None:
 
 def test_default(public_class: type) -> None:
     x = public_class(api_key="BOGUS")
-    assert x.model == x._default_model_name
+    assert x.model is not None
 
 
 @pytest.mark.parametrize(
diff --git a/libs/ai-endpoints/tests/unit_tests/test_register_model.py b/libs/ai-endpoints/tests/unit_tests/test_register_model.py
index 482d40dc..f87efa11 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_register_model.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_register_model.py
@@ -21,9 +21,9 @@
         ("vlm", "NVIDIAEmbeddings"),
         ("vlm", "NVIDIARerank"),
         ("vlm", "NVIDIA"),
-        ("embeddings", "ChatNVIDIA"),
-        ("embeddings", "NVIDIARerank"),
-        ("embeddings", "NVIDIA"),
+        ("embedding", "ChatNVIDIA"),
+        ("embedding", "NVIDIARerank"),
+        ("embedding", "NVIDIA"),
         ("ranking", "ChatNVIDIA"),
         ("ranking", "NVIDIAEmbeddings"),
         ("ranking", "NVIDIA"),
diff --git a/libs/ai-endpoints/tests/unit_tests/test_standard.py b/libs/ai-endpoints/tests/unit_tests/test_standard.py
new file mode 100644
index 00000000..a08cae2a
--- /dev/null
+++ b/libs/ai-endpoints/tests/unit_tests/test_standard.py
@@ -0,0 +1,18 @@
+"""Standard LangChain interface tests"""
+
+from typing import Type
+
+from langchain_core.language_models import BaseChatModel
+from langchain_standard_tests.unit_tests import ChatModelUnitTests
+
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+
+class TestNVIDIAStandard(ChatModelUnitTests):
+    @property
+    def chat_model_class(self) -> Type[BaseChatModel]:
+        return ChatNVIDIA
+
+    @property
+    def chat_model_params(self) -> dict:
+        return {"model": "meta/llama-3.1-8b-instruct"}
diff --git a/libs/ai-endpoints/tests/unit_tests/test_structured_output.py b/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
index 053b10b3..81e4d8eb 100644
--- a/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
+++ b/libs/ai-endpoints/tests/unit_tests/test_structured_output.py
@@ -4,15 +4,14 @@
 
 import pytest
 import requests_mock
-from langchain_core.pydantic_v1 import BaseModel as lc_pydanticV1BaseModel
-from langchain_core.pydantic_v1 import Field
 from pydantic import BaseModel as pydanticV2BaseModel  # ignore: check_pydantic
+from pydantic import Field
 from pydantic.v1 import BaseModel as pydanticV1BaseModel  # ignore: check_pydantic
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
 
-class Joke(lc_pydanticV1BaseModel):
+class Joke(pydanticV2BaseModel):
     """Joke to tell user."""
 
     setup: str = Field(description="The setup of the joke")
@@ -39,7 +38,7 @@ def test_include_raw() -> None:
 
     with pytest.raises(NotImplementedError):
         ChatNVIDIA(api_key="BOGUS").with_structured_output(
-            Joke.schema(), include_raw=True
+            Joke.model_json_schema(), include_raw=True
         )
 
 
@@ -145,11 +144,10 @@ def test_stream_enum_incomplete(
 @pytest.mark.parametrize(
     "pydanticBaseModel",
     [
-        lc_pydanticV1BaseModel,
         pydanticV1BaseModel,
         pydanticV2BaseModel,
     ],
-    ids=["lc-pydantic-v1", "pydantic-v1", "pydantic-v2"],
+    ids=["pydantic-v1", "pydantic-v2"],
 )
 def test_pydantic_version(
     requests_mock: requests_mock.Mocker,
@@ -185,6 +183,7 @@ def test_pydantic_version(
     class Person(pydanticBaseModel):  # type: ignore
         name: str
 
+    warnings.filterwarnings("ignore", r".*not known to support structured output.*")
     llm = ChatNVIDIA(api_key="BOGUS").with_structured_output(Person)
     response = llm.invoke("This is ignored.")
     assert isinstance(response, Person)
diff --git a/libs/ai-endpoints/tests/unit_tests/test_vlm_models.py b/libs/ai-endpoints/tests/unit_tests/test_vlm_models.py
new file mode 100644
index 00000000..edda88c6
--- /dev/null
+++ b/libs/ai-endpoints/tests/unit_tests/test_vlm_models.py
@@ -0,0 +1,61 @@
+from typing import Any, Dict, List, Union
+
+import pytest
+
+from langchain_nvidia_ai_endpoints.chat_models import _nv_vlm_get_asset_ids
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        # Single asset ID in a string (double quotes)
+        ('<img src="data:image/png;asset_id,12345"/>', ["12345"]),
+        # Multiple asset IDs in a string (double quotes)
+        (
+            (
+                '<img src="data:image/png;asset_id,12345"/>'
+                '<img src="data:image/jpeg;asset_id,67890"/>'
+            ),
+            ["12345", "67890"],
+        ),
+        # Single asset ID in list of strings (single quotes)
+        (["<img src='data:image/png;asset_id,12345'/>"], ["12345"]),
+        # Multiple asset IDs in list of strings (single quotes)
+        (
+            [
+                "<img src='data:image/png;asset_id,12345'/>",
+                "<img src='data:image/jpeg;asset_id,67890'/>",
+            ],
+            ["12345", "67890"],
+        ),
+        # Single asset ID in a list of dictionaries
+        ([{"image_url": {"url": "data:image/png;asset_id,12345"}}], ["12345"]),
+        # Multiple asset IDs in a list of dictionaries
+        (
+            [
+                {"image_url": {"url": "data:image/png;asset_id,12345"}},
+                {"image_url": {"url": "data:image/jpeg;asset_id,67890"}},
+            ],
+            ["12345", "67890"],
+        ),
+        # No asset IDs present (double quotes)
+        ('<img src="data:image/png;no_asset_id"/>', []),
+        # No asset IDs present (single quotes)
+        ("<img src='data:image/png;no_asset_id'/>", []),
+    ],
+    ids=[
+        "single_asset_id_string_double_quotes",
+        "multiple_asset_ids_string_double_quotes",
+        "single_asset_id_list_of_strings_single_quotes",
+        "multiple_asset_ids_list_of_strings_single_quotes",
+        "single_asset_id_list_of_dicts",
+        "multiple_asset_ids_list_of_dicts",
+        "no_asset_ids_double_quotes",
+        "no_asset_ids_single_quotes",
+    ],
+)
+def test_nv_vlm_get_asset_ids(
+    content: Union[str, List[Union[str, Dict[str, Any]]]], expected: List[str]
+) -> None:
+    result = _nv_vlm_get_asset_ids(content)
+    assert result == expected