diff --git a/README.md b/README.md
index 22bc2e4cd..c544f7cc7 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ python examples/inference/api_server_openai/query_http_requests.py
 
 # using OpenAI SDK
 # please install openai in current env by running: pip install openai>=1.0
-export OPENAI_API_BASE=http://localhost:8000/v1
+export OPENAI_BASE_URL=http://localhost:8000/v1
 export OPENAI_API_KEY="not_a_real_key"
 python examples/inference/api_server_openai/query_openai_sdk.py
 ```
diff --git a/docs/serve.md b/docs/serve.md
index 0611f60e1..2beed2b18 100644
--- a/docs/serve.md
+++ b/docs/serve.md
@@ -64,7 +64,7 @@ python examples/inference/api_server_openai/query_http_requests.py
 
 # using OpenAI SDK
 # please install openai in current env by running: pip install openai>=1.0
-export OPENAI_API_BASE=http://localhost:8000/v1
+export OPENAI_BASE_URL=http://localhost:8000/v1
 export OPENAI_API_KEY="not_a_real_key"
 python examples/inference/api_server_openai/query_openai_sdk.py
 ```
diff --git a/examples/inference/api_server_openai/query_http_requests.py b/examples/inference/api_server_openai/query_http_requests.py
index 234a62ebb..37487a761 100644
--- a/examples/inference/api_server_openai/query_http_requests.py
+++ b/examples/inference/api_server_openai/query_http_requests.py
@@ -67,7 +67,7 @@
 response = s.post(url, json=body, proxies=proxies, stream=args.streaming_response)  # type: ignore
 for chunk in response.iter_lines(decode_unicode=True):
     try:
-        if chunk is not None:
+        if chunk is not None and chunk != "":
             if args.streaming_response:
                 # Get data from reponse chunk
                 chunk_data = chunk.split("data: ")[1]
diff --git a/examples/inference/api_server_openai/query_openai_sdk.py b/examples/inference/api_server_openai/query_openai_sdk.py
index 3eaa1f404..ed6622e12 100644
--- a/examples/inference/api_server_openai/query_openai_sdk.py
+++ b/examples/inference/api_server_openai/query_openai_sdk.py
@@ -58,4 +58,11 @@
     temperature=args.temperature,
     top_p=args.top_p,
 )
-print(chat_completion)
+if args.streaming_response:
+    for chunk in chat_completion:
+        content = chunk.choices[0].delta.content
+        if content is not None:
+            print(content, end="")
+    print("")
+else:
+    print(chat_completion)
diff --git a/inference/api_openai_backend/router_app.py b/inference/api_openai_backend/router_app.py
index f622e1275..7f511760f 100644
--- a/inference/api_openai_backend/router_app.py
+++ b/inference/api_openai_backend/router_app.py
@@ -108,7 +108,7 @@ async def _completions_wrapper(
                     logger.error(f"{subresult_dict['error']}")
                     all_results.pop()
                     had_error = True
-                    yield "data: " + ModelResponse(**subresult_dict).json() + "\n"
+                    yield "data: " + ModelResponse(**subresult_dict).json() + "\n\n"
                     # Return early in case of an error
                     break
                 choices = [
@@ -125,7 +125,7 @@ async def _completions_wrapper(
                     model=body.model,
                     choices=choices,
                     usage=usage,
-                ).json() + "\n"
+                ).json() + "\n\n"
             if had_error:
                 # Return early in case of an error
                 break
@@ -141,8 +141,8 @@ async def _completions_wrapper(
                 model=body.model,
                 choices=choices,
                 usage=usage,
-            ).json() + "\n"
-        yield "data: [DONE]\n"
+            ).json() + "\n\n"
+        yield "data: [DONE]\n\n"
 
 
 async def _chat_completions_wrapper(
@@ -167,7 +167,7 @@ async def _chat_completions_wrapper(
             model=body.model,
             choices=choices,
             usage=None,
-        ).json() + "\n"
+        ).json() + "\n\n"
 
         all_results = []
         async for results in generator:
@@ -182,7 +182,7 @@ async def _chat_completions_wrapper(
                     subresult_dict["finish_reason"] = None
                     all_results.pop()
                     had_error = True
-                    yield "data: " + ModelResponse(**subresult_dict).json() + "\n"
+                    yield "data: " + ModelResponse(**subresult_dict).json() + "\n\n"
                     # Return early in case of an error
                     break
                 else:
@@ -200,7 +200,7 @@ async def _chat_completions_wrapper(
                         model=body.model,
                         choices=choices,
                         usage=None,
-                    ).json() + "\n"
+                    ).json() + "\n\n"
             if had_error:
                 # Return early in case of an error
                 break
@@ -223,8 +223,8 @@ async def _chat_completions_wrapper(
                 model=body.model,
                 choices=choices,
                 usage=usage,
-            ).json() + "\n"
-        yield "data: [DONE]\n"
+            ).json() + "\n\n"
+        yield "data: [DONE]\n\n"
 
 
 class Router: