forked from langchain-ai/langchain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nvidia-trt[patch]: add TritonTensorRTLLM(verbose_client=False) (langc…
…hain-ai#16848) - **Description:** adding verbose flag to TritonTensorRTLLM, - **Issue:** nope, - **Dependencies:** not any, - **Twitter handle:**
- Loading branch information
Showing
2 changed files
with
31 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,33 @@ | ||
"""Test TritonTensorRT Chat API wrapper.""" | ||
import sys | ||
from io import StringIO | ||
from unittest.mock import patch | ||
|
||
from langchain_nvidia_trt import TritonTensorRTLLM | ||
|
||
|
||
def test_initialization() -> None: | ||
"""Test integration initialization.""" | ||
TritonTensorRTLLM(model_name="ensemble", server_url="http://localhost:8001") | ||
|
||
|
||
@patch("tritonclient.grpc.service_pb2_grpc.GRPCInferenceServiceStub") | ||
def test_default_verbose(ignore) -> None: | ||
llm = TritonTensorRTLLM(server_url="http://localhost:8001", model_name="ensemble") | ||
captured = StringIO() | ||
sys.stdout = captured | ||
llm.client.is_server_live() | ||
sys.stdout = sys.__stdout__ | ||
assert "is_server_live" not in captured.getvalue() | ||
|
||
|
||
@patch("tritonclient.grpc.service_pb2_grpc.GRPCInferenceServiceStub") | ||
def test_verbose(ignore) -> None: | ||
llm = TritonTensorRTLLM( | ||
server_url="http://localhost:8001", model_name="ensemble", verbose_client=True | ||
) | ||
captured = StringIO() | ||
sys.stdout = captured | ||
llm.client.is_server_live() | ||
sys.stdout = sys.__stdout__ | ||
assert "is_server_live" in captured.getvalue() |