From 62677d30accf34ff2ce3b7ddd3d59390aebe5dec Mon Sep 17 00:00:00 2001
From: Dennis Traub <dennis.traub@gmail.com>
Date: Thu, 25 Apr 2024 17:06:56 +0200
Subject: [PATCH] Python, Java, JavaScript: Bedrock getting started examples
 for Llama 2 and 3 (#6375)

Co-authored-by: Dennis Traub <traubd@amazon.com>
---
 .../metadata/bedrock-runtime_metadata.yaml    | 123 ++++++++++++++++--
 .../example_code/bedrock-runtime/README.md    |   5 +-
 .../config/foundation_models.js               |  12 --
 .../meta/llama2/invoke_model_quickstart.js    |  54 ++++++++
 ...e_model_with_response_stream_quickstart.js |  54 ++++++++
 .../meta/llama3/invoke_model_quickstart.js    |  59 +++++++++
 ...e_model_with_response_stream_quickstart.js |  60 +++++++++
 .../models/meta_llama2/llama2_chat.js         |  67 ----------
 .../tests/meta_llama.integration.test.js      |  54 ++++++++
 .../tests/meta_llama2.integration.test.js     |  25 ----
 javav2/example_code/bedrock-runtime/README.md |   9 +-
 .../BedrockRuntimeUsageDemo.java              |   5 -
 .../example/bedrockruntime/InvokeModel.java   |  47 -------
 .../bedrockruntime/InvokeModelAsync.java      |  61 ---------
 .../InvokeModelWithResponseStream.java        |   3 +
 .../meta/llama2/InvokeModelQuickstart.java    |  55 ++++++++
 ...vokeModelWithResponseStreamQuickstart.java |  62 +++++++++
 .../meta/llama3/InvokeModelQuickstart.java    |  63 +++++++++
 ...vokeModelWithResponseStreamQuickstart.java |  70 ++++++++++
 .../src/test/java/TestLlama2.java             |  26 ++++
 .../src/test/java/TestLlama3.java             |  26 ++++
 .../src/test/java/TextToTextAsyncTest.java    |   9 --
 .../src/test/java/TextToTextSyncTest.java     |   8 --
 python/example_code/bedrock-runtime/README.md |  15 ++-
 .../bedrock_runtime_wrapper.py                |  37 ------
 .../meta/llama2/invoke_model_quickstart.py    |  44 +++++++
 ...e_model_with_response_stream_quickstart.py |  46 +++++++
 .../meta/llama3/invoke_model_quickstart.py    |  50 +++++++
 ...e_model_with_response_stream_quickstart.py |  52 ++++++++
 .../meta/test_invoke_model_quickstarts.py     |  62 +++++++++
 .../test/test_bedrock_runtime_wrapper.py      |  21 ---
 python/test_tools/bedrock_runtime_stubber.py  |  18 ---
 32 files changed, 969 insertions(+), 333 deletions(-)
 create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js
 create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js
 create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js
 create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js
 delete mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js
 create mode 100644 javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js
 delete mode 100644 javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js
 create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java
 create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java
 create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java
 create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java
 create mode 100644 javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java
 create mode 100644 javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java
 create mode 100644 python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py
 create mode 100644 python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py
 create mode 100644 python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py
 create mode 100644 python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py
 create mode 100644 python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py

diff --git a/.doc_gen/metadata/bedrock-runtime_metadata.yaml b/.doc_gen/metadata/bedrock-runtime_metadata.yaml
index 88af4fc5bf9..071fcc361f7 100644
--- a/.doc_gen/metadata/bedrock-runtime_metadata.yaml
+++ b/.doc_gen/metadata/bedrock-runtime_metadata.yaml
@@ -519,10 +519,10 @@ bedrock-runtime_InvokeJurassic2:
   services:
     bedrock-runtime: {InvokeModel}
 
-bedrock-runtime_InvokeLlama2:
-  title: Invoke the Meta Llama 2 Chat model on &BR; for text generation
+bedrock-runtime_Llama2_InvokeLlama:
+  title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads
   title_abbrev: "Meta Llama 2: Text generation"
-  synopsis: invoke the Meta Llama 2 Chat model on &BR; for text generation.
+  synopsis: get started sending prompts to Meta Llama 2 and printing the response.
   category: Invoke model examples
   languages:
     Go:
@@ -538,20 +538,17 @@ bedrock-runtime_InvokeLlama2:
         - sdk_version: 2
           github: javav2/example_code/bedrock-runtime
           excerpts:
-            - description: Asynchronously invoke the Meta Llama 2 Chat foundation model to generate text.
+            - description: Send your first prompt to Meta Llama 2.
               snippet_tags:
-                - bedrock-runtime.java2.invoke_llama2_async.main
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
-              snippet_tags:
-                - bedrock-runtime.java2.invoke_llama2.main
+                - bedrock-runtime.java2.InvokeModel_Llama2_Quickstart
     JavaScript:
       versions:
         - sdk_version: 3
           github: javascriptv3/example_code/bedrock-runtime
           excerpts:
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
-              snippet_files:
-                - javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js
+            - description: Send your first prompt to Meta Llama 2.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart
     PHP:
       versions:
         - sdk_version: 3
@@ -565,9 +562,9 @@ bedrock-runtime_InvokeLlama2:
         - sdk_version: 3
           github: python/example_code/bedrock-runtime
           excerpts:
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
+            - description: Send your first prompt to Meta Llama 2.
               snippet_tags:
-                - python.example_code.bedrock-runtime.InvokeMetaLlama2
+                - python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart
     .NET:
       versions:
         - sdk_version: 3
@@ -579,6 +576,106 @@ bedrock-runtime_InvokeLlama2:
   services:
     bedrock-runtime: {InvokeModel}
 
+bedrock-runtime_Llama2_InvokeModelWithResponseStream:
+  title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads with a response stream
+  title_abbrev: "Meta Llama 2: Text generation with response stream"
+  synopsis: get started sending prompts to Meta Llama 2 and printing the response stream in real-time.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart
+  services:
+    bedrock-runtime: {InvokeModelWithResponseStream}
+
+
+bedrock-runtime_Llama3_InvokeLlama:
+  title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads
+  title_abbrev: "Meta Llama 3: Text generation"
+  synopsis: get started sending prompts to Meta Llama 3 and printing the response.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModel_Llama3_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart
+  services:
+    bedrock-runtime: {InvokeModel}
+
+bedrock-runtime_Llama3_InvokeModelWithResponseStream:
+  title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads with a response stream
+  title_abbrev: "Meta Llama 3: Text generation with response stream"
+  synopsis: get started sending prompts to Meta Llama 3 and printing the response stream in real-time.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart
+  services:
+    bedrock-runtime: {InvokeModelWithResponseStream}
+
 bedrock-runtime_Scenario_InvokeModels:
   title: Invoke various foundation models on &BR;
   title_abbrev: Invoke multiple foundation models on &BR;
diff --git a/javascriptv3/example_code/bedrock-runtime/README.md b/javascriptv3/example_code/bedrock-runtime/README.md
index 6ba29e94559..234ae1ec4da 100644
--- a/javascriptv3/example_code/bedrock-runtime/README.md
+++ b/javascriptv3/example_code/bedrock-runtime/README.md
@@ -51,7 +51,10 @@ functions within the same service.
 - [Anthropic Claude 2: Text generation](models/anthropic_claude/claude_2.js)
 - [Anthropic Claude 3: Text generation](models/anthropic_claude/claude_3.js)
 - [Anthropic Claude Instant: Text generation](models/anthropic_claude/claude_instant_1.js)
-- [Meta Llama 2: Text generation](models/meta_llama2/llama2_chat.js)
+- [Meta Llama 2: Text generation](models/meta/llama2/invoke_model_quickstart.js#L4)
+- [Meta Llama 2: Text generation with response stream](models/meta/llama2/invoke_model_with_response_stream_quickstart.js#L4)
+- [Meta Llama 3: Text generation](models/meta/llama3/invoke_model_quickstart.js#L4)
+- [Meta Llama 3: Text generation with response stream](models/meta/llama3/invoke_model_with_response_stream_quickstart.js#L4)
 - [Mistral AI: Text generation with Mistral 7B Instruct](models/mistral_ai/mistral_7b.js)
 - [Mistral AI: Text generation with Mixtral 8x7B Instruct](models/mistral_ai/mixtral_8x7b.js)
 
diff --git a/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js b/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js
index a695e16774e..1dc2c5e7509 100644
--- a/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js
+++ b/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js
@@ -50,18 +50,6 @@ export const FoundationModels = Object.freeze({
     module: () => import("../models/ai21_labs_jurassic2/jurassic2.js"),
     invoker: (/** @type {Module} */ module) => module.invokeModel,
   },
-  LLAMA2_CHAT_13B: {
-    modelId: "meta.llama2-13b-chat-v1",
-    modelName: "Llama 2 Chat 13B",
-    module: () => import("../models/meta_llama2/llama2_chat.js"),
-    invoker: (/** @type {Module} */ module) => module.invokeModel,
-  },
-  LLAMA2_CHAT_70B: {
-    modelId: "meta.llama2-70b-chat-v1",
-    modelName: "Llama 2 Chat 70B",
-    module: () => import("../models/meta_llama2/llama2_chat.js"),
-    invoker: (/** @type {Module} */ module) => module.invokeModel,
-  },
   MISTRAL_7B: {
     modelId: "mistral.mistral-7b-instruct-v0:2",
     modelName: "Mistral 7B Instruct",
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js
new file mode 100644
index 00000000000..210aa789188
--- /dev/null
+++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js
@@ -0,0 +1,54 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 2 Chat 13B.
+const modelId = "meta.llama2-13b-chat-v1";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 2's prompt format.
+const prompt = `<s>[INST] ${userMessage} [/INST]`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const response = await client.send(
+  new InvokeModelCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Decode the native response body.
+/** @type {{ generation: string }} */
+const nativeResponse = JSON.parse(new TextDecoder().decode(response.body));
+
+// Extract and print the generated text.
+const responseText = nativeResponse.generation;
+console.log(responseText);
+
+// Learn more about the Llama 2 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart]
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js
new file mode 100644
index 00000000000..91012dc0c66
--- /dev/null
+++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js
@@ -0,0 +1,54 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response stream in real-time.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelWithResponseStreamCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 2 Chat 13B.
+const modelId = "meta.llama2-13b-chat-v1";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 2's prompt format.
+const prompt = `<s>[INST] ${userMessage} [/INST]`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const responseStream = await client.send(
+  new InvokeModelWithResponseStreamCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Extract and print the response stream in real-time.
+for await (const event of responseStream.body) {
+  /** @type {{ generation: string }} */
+  const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
+  if (chunk.generation) {
+    process.stdout.write(chunk.generation);
+  }
+}
+
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js
new file mode 100644
index 00000000000..5b5abc3a840
--- /dev/null
+++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js
@@ -0,0 +1,59 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart]
+// Send a prompt to Meta Llama 3 and print the response.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 3 8B Instruct.
+const modelId = "meta.llama3-8b-instruct-v1:0";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 3's prompt format.
+const prompt = `
+<|begin_of_text|>
+<|start_header_id|>user<|end_header_id|>
+${userMessage}
+<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const response = await client.send(
+  new InvokeModelCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Decode the native response body.
+/** @type {{ generation: string }} */
+const nativeResponse = JSON.parse(new TextDecoder().decode(response.body));
+
+// Extract and print the generated text.
+const responseText = nativeResponse.generation;
+console.log(responseText);
+
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart]
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js
new file mode 100644
index 00000000000..daa55bb7c21
--- /dev/null
+++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js
@@ -0,0 +1,60 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart]
+// Send a prompt to Meta Llama 3 and print the response stream in real-time.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelWithResponseStreamCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 3 8B Instruct.
+const modelId = "meta.llama3-8b-instruct-v1:0";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 3's prompt format.
+const prompt = `
+<|begin_of_text|>
+<|start_header_id|>user<|end_header_id|>
+${userMessage}
+<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const responseStream = await client.send(
+  new InvokeModelWithResponseStreamCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Extract and print the response stream in real-time.
+for await (const event of responseStream.body) {
+  /** @type {{ generation: string }} */
+  const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
+  if (chunk.generation) {
+    process.stdout.write(chunk.generation);
+  }
+}
+
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart]
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js b/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js
deleted file mode 100644
index 36210000d17..00000000000
--- a/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { fileURLToPath } from "url";
-
-import { FoundationModels } from "../../config/foundation_models.js";
-import {
-  BedrockRuntimeClient,
-  InvokeModelCommand,
-} from "@aws-sdk/client-bedrock-runtime";
-
-/**
- * @typedef {Object} ResponseBody
- * @property {generation} text
- */
-
-/**
- * Invokes a Meta Llama 2 Chat model.
- *
- * @param {string} prompt - The input text prompt for the model to complete.
- * @param {string} [modelId] - The ID of the model to use. Defaults to "meta.llama2-13b-chat-v1".
- */
-export const invokeModel = async (
-  prompt,
-  modelId = "meta.llama2-13b-chat-v1",
-) => {
-  // Create a new Bedrock Runtime client instance.
-  const client = new BedrockRuntimeClient({ region: "us-east-1" });
-
-  // Prepare the payload for the model.
-  const payload = {
-    prompt,
-    temperature: 0.5,
-    max_gen_len: 1000,
-  };
-
-  // Invoke the model with the payload and wait for the response.
-  const command = new InvokeModelCommand({
-    contentType: "application/json",
-    body: JSON.stringify(payload),
-    modelId,
-  });
-  const apiResponse = await client.send(command);
-
-  // Decode and return the response.
-  const decodedResponseBody = new TextDecoder().decode(apiResponse.body);
-  /** @type {ResponseBody} */
-  const responseBody = JSON.parse(decodedResponseBody);
-  return responseBody.generation;
-};
-
-// Invoke the function if this file was run directly.
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
-  const prompt =
-    'Complete the following in one sentence: "Once upon a time..."';
-  const modelId = FoundationModels.LLAMA2_CHAT_13B.modelId;
-  console.log(`Prompt: ${prompt}`);
-  console.log(`Model ID: ${modelId}`);
-
-  try {
-    console.log("-".repeat(53));
-    const response = await invokeModel(prompt, modelId);
-    console.log(response);
-  } catch (err) {
-    console.log(err);
-  }
-}
diff --git a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js
new file mode 100644
index 00000000000..f522cfec44d
--- /dev/null
+++ b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js
@@ -0,0 +1,54 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { vi, describe, it, expect } from "vitest";
+
+/**
+ * Integration tests for:
+ * - models/meta/llama2/*.js
+ * - models/meta/llama3/*.js
+ */
+
+describe("Running the Llama2 InvokeModel quickstart", () => {
+  it("should run and log the model's response", async () => {
+    const log = vi.spyOn(console, "log").mockImplementation(() => {});
+    await import("../models/meta/llama2/invoke_model_quickstart.js");
+    expect(log).toHaveBeenCalledTimes(1);
+    log.mockRestore();
+  });
+});
+
+describe("Running the Llama2 InvokeModelWithResponseStream quickstart", () => {
+  it("should run and log the model's response", async () => {
+    const write = vi
+      .spyOn(process.stdout, "write")
+      .mockImplementation(() => {});
+    await import(
+      "../models/meta/llama2/invoke_model_with_response_stream_quickstart.js"
+    );
+    expect(write).toHaveBeenCalled();
+    write.mockRestore();
+  });
+});
+
+describe("Running the Llama3 InvokeModel quickstart", () => {
+  it("should run and log the model's response", async () => {
+    const log = vi.spyOn(console, "log").mockImplementation(() => {});
+    await import("../models/meta/llama3/invoke_model_quickstart.js");
+    expect(log).toHaveBeenCalledTimes(1);
+    log.mockRestore();
+  });
+});
+
+describe("Running the Llama3 InvokeModelWithResponseStream quickstart", () => {
+  it("should run and log the model's response", async () => {
+    const write = vi
+      .spyOn(process.stdout, "write")
+      .mockImplementation(() => {});
+    await import(
+      "../models/meta/llama3/invoke_model_with_response_stream_quickstart.js"
+    );
+    expect(write).toHaveBeenCalled();
+    write.mockRestore();
+  });
+});
diff --git a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js
deleted file mode 100644
index cd00ef0811e..00000000000
--- a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-import { describe, it } from "vitest";
-import { FoundationModels } from "../config/foundation_models.js";
-import { expectToBeANonEmptyString } from "./test_tools.js";
-import { invokeModel } from "../models/meta_llama2/llama2_chat.js";
-
-const TEXT_PROMPT = "Hello, this is a test prompt";
-
-describe("Invoke Llama2 Chat 13B", () => {
-  it("should return a response", async () => {
-    const modelId = FoundationModels.LLAMA2_CHAT_13B.modelId;
-    const response = await invokeModel(TEXT_PROMPT, modelId);
-    expectToBeANonEmptyString(response);
-  });
-});
-
-describe("Invoke Llama2 Chat 70B", () => {
-  it("should return a response", async () => {
-    const modelId = FoundationModels.LLAMA2_CHAT_70B.modelId;
-    const response = await invokeModel(TEXT_PROMPT, modelId);
-    expectToBeANonEmptyString(response);
-  });
-});
diff --git a/javav2/example_code/bedrock-runtime/README.md b/javav2/example_code/bedrock-runtime/README.md
index 0d012f2704b..2aceb3cda3f 100644
--- a/javav2/example_code/bedrock-runtime/README.md
+++ b/javav2/example_code/bedrock-runtime/README.md
@@ -41,14 +41,17 @@ functions within the same service.
 ### Invoke model examples
 
 - [AI21 Labs Jurassic-2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L205)
-- [Amazon Titan: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L399)
+- [Amazon Titan: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L338)
 - [Anthropic Claude 2: Real-time response stream processing](src/main/java/com/example/bedrockruntime/Claude2.java#L65)
 - [Anthropic Claude 2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModel.java#L112)
 - [Anthropic Claude 3: Real-time response stream processing](src/main/java/com/example/bedrockruntime/Claude3.java#L49)
-- [Meta Llama 2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L268)
+- [Meta Llama 2: Text generation](src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java#L11)
+- [Meta Llama 2: Text generation with response stream](src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java#L12)
+- [Meta Llama 3: Text generation](src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java#L13)
+- [Meta Llama 3: Text generation with response stream](src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java#L14)
 - [Mistral AI: Text generation with Mistral 7B Instruct](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L33)
 - [Mistral AI: Text generation with Mixtral 8x7B Instruct](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L88)
-- [Stable Diffusion: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L329)
+- [Stable Diffusion: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L268)
 
 
 <!--custom.examples.start-->
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java
index f74ccd8ab80..0d28151d0ed 100644
--- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java
@@ -27,7 +27,6 @@ public class BedrockRuntimeUsageDemo {
 
     private static final String CLAUDE = "anthropic.claude-v2";
     private static final String JURASSIC2 = "ai21.j2-mid-v1";
-    private static final String LLAMA2 = "meta.llama2-13b-chat-v1";
     private static final String MISTRAL7B = "mistral.mistral-7b-instruct-v0:2";
     private static final String MIXTRAL8X7B = "mistral.mixtral-8x7b-instruct-v0:1";
     private static final String STABLE_DIFFUSION = "stability.stable-diffusion-xl";
@@ -44,7 +43,6 @@ private static void textToText() {
         String prompt = "In one sentence, what is a large-language model?";
         BedrockRuntimeUsageDemo.invoke(CLAUDE, prompt);
         BedrockRuntimeUsageDemo.invoke(JURASSIC2, prompt);
-        BedrockRuntimeUsageDemo.invoke(LLAMA2, prompt);
         BedrockRuntimeUsageDemo.invoke(MISTRAL7B, prompt);
         BedrockRuntimeUsageDemo.invoke(MIXTRAL8X7B, prompt);
     }
@@ -66,9 +64,6 @@ private static void invoke(String modelId, String prompt, String stylePreset) {
                 case JURASSIC2:
                     printResponse(invokeJurassic2(prompt));
                     break;
-                case LLAMA2:
-                    printResponse(invokeLlama2(prompt));
-                    break;
                 case MISTRAL7B:
                     for (String response : invokeMistral7B(prompt)) {
                         printResponse(response);
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java
index c25c0467e18..8204ffde3b2 100644
--- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java
@@ -208,53 +208,6 @@ public static String invokeJurassic2(String prompt) {
         }
         // snippet-end:[bedrock-runtime.java2.invoke_jurassic2.main]
 
-        // snippet-start:[bedrock-runtime.java2.invoke_llama2.main]
-        /**
-         * Invokes the Meta Llama 2 Chat model to run an inference based on the provided
-         * input.
-         *
-         * @param prompt The prompt for Llama 2 to complete.
-         * @return The generated response.
-         */
-        public static String invokeLlama2(String prompt) {
-                /*
-                 * The different model providers have individual request and response formats.
-                 * For the format, ranges, and default values for Meta Llama 2 Chat, refer to:
-                 * https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.
-                 * html
-                 */
-
-                String llama2ModelId = "meta.llama2-13b-chat-v1";
-
-                BedrockRuntimeClient client = BedrockRuntimeClient.builder()
-                                .region(Region.US_EAST_1)
-                                .credentialsProvider(ProfileCredentialsProvider.create())
-                                .build();
-
-                String payload = new JSONObject()
-                                .put("prompt", prompt)
-                                .put("max_gen_len", 512)
-                                .put("temperature", 0.5)
-                                .put("top_p", 0.9)
-                                .toString();
-
-                InvokeModelRequest request = InvokeModelRequest.builder()
-                                .body(SdkBytes.fromUtf8String(payload))
-                                .modelId(llama2ModelId)
-                                .contentType("application/json")
-                                .accept("application/json")
-                                .build();
-
-                InvokeModelResponse response = client.invokeModel(request);
-
-                JSONObject responseBody = new JSONObject(response.body().asUtf8String());
-
-                String generatedText = responseBody.getString("generation");
-
-                return generatedText;
-        }
-        // snippet-end:[bedrock-runtime.java2.invoke_llama2.main]
-
         // snippet-start:[bedrock-runtime.java2.invoke_stable_diffusion.main]
         /**
          * Invokes the Stability.ai Stable Diffusion XL model to create an image based
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java
index ca80b8a8d64..ef4361ee850 100644
--- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java
@@ -265,67 +265,6 @@ public static String invokeJurassic2(String prompt) {
     }
     // snippet-end:[bedrock-runtime.java2.invoke_jurassic-2_async.main]
 
-    // snippet-start:[bedrock-runtime.java2.invoke_llama2_async.main]
-    /**
-     * Asynchronously invokes the Meta Llama 2 Chat model to run an inference based
-     * on the provided input.
-     *
-     * @param prompt The prompt that you want Llama 2 to complete.
-     * @return The inference response generated by the model.
-     */
-    public static String invokeLlama2(String prompt) {
-        /*
-         * The different model providers have individual request and response formats.
-         * For the format, ranges, and default values for Meta Llama 2 Chat, refer to:
-         * https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.
-         * html
-         */
-
-        String llama2ModelId = "meta.llama2-13b-chat-v1";
-
-        BedrockRuntimeAsyncClient client = BedrockRuntimeAsyncClient.builder()
-                .region(Region.US_EAST_1)
-                .credentialsProvider(ProfileCredentialsProvider.create())
-                .build();
-
-        String payload = new JSONObject()
-                .put("prompt", prompt)
-                .put("max_gen_len", 512)
-                .put("temperature", 0.5)
-                .put("top_p", 0.9)
-                .toString();
-
-        InvokeModelRequest request = InvokeModelRequest.builder()
-                .body(SdkBytes.fromUtf8String(payload))
-                .modelId(llama2ModelId)
-                .contentType("application/json")
-                .accept("application/json")
-                .build();
-
-        CompletableFuture<InvokeModelResponse> completableFuture = client.invokeModel(request)
-                .whenComplete((response, exception) -> {
-                    if (exception != null) {
-                        System.out.println("Model invocation failed: " + exception);
-                    }
-                });
-
-        String generatedText = "";
-        try {
-            InvokeModelResponse response = completableFuture.get();
-            JSONObject responseBody = new JSONObject(response.body().asUtf8String());
-            generatedText = responseBody.getString("generation");
-
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            System.err.println(e.getMessage());
-        } catch (ExecutionException e) {
-            System.err.println(e.getMessage());
-        }
-
-        return generatedText;
-    }
-    // snippet-end:[bedrock-runtime.java2.invoke_llama2_async.main]
-
     // snippet-start:[bedrock-runtime.java2.invoke_stable_diffusion_async.main]
     /**
      * Asynchronously invokes the Stability.ai Stable Diffusion XL model to create
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java
index abe422469d3..7da4489b347 100644
--- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java
@@ -4,6 +4,8 @@
 package com.example.bedrockruntime;
 
 // snippet-start:[bedrock-runtime.java2.invoke_model_with_response_stream.import]
+
+import org.json.JSONArray;
 import org.json.JSONObject;
 import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
 import software.amazon.awssdk.core.SdkBytes;
@@ -12,6 +14,7 @@
 import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamRequest;
 import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler;
 
+import java.util.Iterator;
 import java.util.concurrent.atomic.AtomicReference;
 // snippet-end:[bedrock-runtime.java2.invoke_model_with_response_stream.import]
 
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java
new file mode 100644
index 00000000000..174538d845f
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java
@@ -0,0 +1,55 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package com.example.bedrockruntime.models.meta.llama2;
+
+import org.json.JSONObject;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient;
+
+// snippet-start:[bedrock-runtime.java2.InvokeModel_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response.
+public class InvokeModelQuickstart {
+
+    public static void main(String[] args) {
+
+        // Create a Bedrock Runtime client in the AWS Region of your choice.
+        var client = BedrockRuntimeClient.builder()
+                .region(Region.US_WEST_2)
+                .build();
+
+        // Set the model ID, e.g., Llama 2 Chat 13B.
+        var modelId = "meta.llama2-13b-chat-v1";
+
+        // Define the user message to send.
+        var userMessage = "Describe the purpose of a 'hello world' program in one line.";
+
+        // Embed the message in Llama 2's prompt format.
+        var prompt = "<s>[INST] " + userMessage + " [/INST]";
+
+        // Create a JSON payload using the model's native structure.
+        var request = new JSONObject()
+                .put("prompt", prompt)
+                // Optional inference parameters:
+                .put("max_gen_len", 512)
+                .put("temperature", 0.5F)
+                .put("top_p", 0.9F);
+
+        // Encode and send the request.
+        var response = client.invokeModel(req -> req
+                .body(SdkBytes.fromUtf8String(request.toString()))
+                .modelId(modelId));
+
+        // Decode the native response body.
+        var nativeResponse = new JSONObject(response.body().asUtf8String());
+
+        // Extract and print the response text.
+        var responseText = nativeResponse.getString("generation");
+        System.out.println(responseText);
+    }
+}
+// Learn more about the Llama 2 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+// snippet-end:[bedrock-runtime.java2.InvokeModel_Llama2_Quickstart]
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java
new file mode 100644
index 00000000000..e2ea5511f18
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java
@@ -0,0 +1,62 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package com.example.bedrockruntime.models.meta.llama2;
+
+import org.json.JSONObject;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeAsyncClient;
+import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler;
+
+// snippet-start:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response stream in real-time.
+public class InvokeModelWithResponseStreamQuickstart {
+
+    public static void main(String[] args) {
+
+        // Create a Bedrock Runtime client in the AWS Region of your choice.
+        var client = BedrockRuntimeAsyncClient.builder()
+                .region(Region.US_WEST_2)
+                .build();
+
+        // Set the model ID, e.g., Llama 2 Chat 13B.
+        var modelId = "meta.llama2-13b-chat-v1";
+
+        // Define the user message to send.
+        var userMessage = "Describe the purpose of a 'hello world' program in one line.";
+
+        // Embed the message in Llama 2's prompt format.
+        var prompt = "<s>[INST] " + userMessage + " [/INST]";
+
+        // Create a JSON payload using the model's native structure.
+        var request = new JSONObject()
+                .put("prompt", prompt)
+                // Optional inference parameters:
+                .put("max_gen_len", 512)
+                .put("temperature", 0.5F)
+                .put("top_p", 0.9F);
+
+        // Create a handler to extract and print the response text in real-time.
+        var streamHandler = InvokeModelWithResponseStreamResponseHandler.builder()
+                .subscriber(event -> event.accept(
+                        InvokeModelWithResponseStreamResponseHandler.Visitor.builder()
+                                .onChunk(c -> {
+                                    var chunk = new JSONObject(c.bytes().asUtf8String());
+                                    if (chunk.has("generation")) {
+                                        System.out.print(chunk.getString("generation"));
+                                    }
+                                }).build())
+                ).build();
+
+        // Encode and send the request. Let the stream handler process the response.
+        client.invokeModelWithResponseStream(req -> req
+                .body(SdkBytes.fromUtf8String(request.toString()))
+                .modelId(modelId), streamHandler
+        ).join();
+    }
+}
+// Learn more about the Llama 2 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+// snippet-end:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart]
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java
new file mode 100644
index 00000000000..1652157e14b
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java
@@ -0,0 +1,63 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package com.example.bedrockruntime.models.meta.llama3;
+
+import org.json.JSONObject;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient;
+
+import java.text.MessageFormat;
+
+// snippet-start:[bedrock-runtime.java2.InvokeModel_Llama3_Quickstart]
+// Send a prompt to Meta Llama 3 and print the response.
+public class InvokeModelQuickstart {
+
+    public static void main(String[] args) {
+
+        // Create a Bedrock Runtime client in the AWS Region of your choice.
+        var client = BedrockRuntimeClient.builder()
+                .region(Region.US_WEST_2)
+                .build();
+
+        // Set the model ID, e.g., Llama 3 8B Instruct.
+        var modelId = "meta.llama3-8b-instruct-v1:0";
+
+        // Define the user message to send.
+        var userMessage = "Describe the purpose of a 'hello world' program in one line.";
+
+        // Embed the message in Llama 3's prompt format.
+        var prompt = MessageFormat.format("""
+                <|begin_of_text|>
+                <|start_header_id|>user<|end_header_id|>
+                {0}
+                <|eot_id|>
+                <|start_header_id|>assistant<|end_header_id|>
+                """, userMessage);
+
+        // Create a JSON payload using the model's native structure.
+        var request = new JSONObject()
+                .put("prompt", prompt)
+                // Optional inference parameters:
+                .put("max_gen_len", 512)
+                .put("temperature", 0.5F)
+                .put("top_p", 0.9F);
+
+        // Encode and send the request.
+        var response = client.invokeModel(req -> req
+                .body(SdkBytes.fromUtf8String(request.toString()))
+                .modelId(modelId));
+
+        // Decode the native response body.
+        var nativeResponse = new JSONObject(response.body().asUtf8String());
+
+        // Extract and print the response text.
+        var responseText = nativeResponse.getString("generation");
+        System.out.println(responseText);
+    }
+}
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+
+// snippet-end:[bedrock-runtime.java2.InvokeModel_Llama3_Quickstart]
diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java
new file mode 100644
index 00000000000..845c8c434ef
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java
@@ -0,0 +1,70 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package com.example.bedrockruntime.models.meta.llama3;
+
+import org.json.JSONObject;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeAsyncClient;
+import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler;
+
+import java.text.MessageFormat;
+
+// snippet-start:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart]
+// Send a prompt to Meta Llama 3 and print the response stream in real-time.
+public class InvokeModelWithResponseStreamQuickstart {
+
+    public static void main(String[] args) {
+
+        // Create a Bedrock Runtime client in the AWS Region of your choice.
+        var client = BedrockRuntimeAsyncClient.builder()
+                .region(Region.US_WEST_2)
+                .build();
+
+        // Set the model ID, e.g., Llama 3 8B Instruct.
+        var modelId = "meta.llama3-8b-instruct-v1:0";
+
+        // Define the user message to send.
+        var userMessage = "Describe the purpose of a 'hello world' program in one line.";
+
+        // Embed the message in Llama 3's prompt format.
+        var prompt = MessageFormat.format("""
+                <|begin_of_text|>
+                <|start_header_id|>user<|end_header_id|>
+                {0}
+                <|eot_id|>
+                <|start_header_id|>assistant<|end_header_id|>
+                """, userMessage);
+
+        // Create a JSON payload using the model's native structure.
+        var request = new JSONObject()
+                .put("prompt", prompt)
+                // Optional inference parameters:
+                .put("max_gen_len", 512)
+                .put("temperature", 0.5F)
+                .put("top_p", 0.9F);
+
+        // Create a handler to extract and print the response text in real-time.
+        var streamHandler = InvokeModelWithResponseStreamResponseHandler.builder()
+                .subscriber(event -> event.accept(
+                        InvokeModelWithResponseStreamResponseHandler.Visitor.builder()
+                                .onChunk(c -> {
+                                    var chunk = new JSONObject(c.bytes().asUtf8String());
+                                    if (chunk.has("generation")) {
+                                        System.out.print(chunk.getString("generation"));
+                                    }
+                                }).build())
+                ).build();
+
+        // Encode and send the request. Let the stream handler process the response.
+        client.invokeModelWithResponseStream(req -> req
+                .body(SdkBytes.fromUtf8String(request.toString()))
+                .modelId(modelId), streamHandler
+        ).join();
+    }
+}
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+
+// snippet-end:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart]
diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java
new file mode 100644
index 00000000000..17f965fe9a6
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java
@@ -0,0 +1,26 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import com.example.bedrockruntime.models.meta.llama2.InvokeModelQuickstart;
+import com.example.bedrockruntime.models.meta.llama2.InvokeModelWithResponseStreamQuickstart;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+@Tag("IntegrationTest")
+@TestInstance(TestInstance.Lifecycle.PER_METHOD)
+public class TestLlama2 {
+
+    @Test
+    void InvokeModel() {
+        assertDoesNotThrow(() -> InvokeModelQuickstart.main(null));
+    }
+
+    @Test
+    void InvokeModelWithResponseStream() {
+        assertDoesNotThrow(() -> InvokeModelWithResponseStreamQuickstart.main(null));
+    }
+
+}
diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java
new file mode 100644
index 00000000000..5277dcaf592
--- /dev/null
+++ b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java
@@ -0,0 +1,26 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import com.example.bedrockruntime.models.meta.llama3.InvokeModelQuickstart;
+import com.example.bedrockruntime.models.meta.llama3.InvokeModelWithResponseStreamQuickstart;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+@Tag("IntegrationTest")
+@TestInstance(TestInstance.Lifecycle.PER_METHOD)
+public class TestLlama3 {
+
+    @Test
+    void InvokeModel() {
+        assertDoesNotThrow(() -> InvokeModelQuickstart.main(null));
+    }
+
+    @Test
+    void InvokeModelWithResponseStream() {
+        assertDoesNotThrow(() -> InvokeModelWithResponseStreamQuickstart.main(null));
+    }
+
+}
diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java
index 652689c227a..91ba1db87ff 100644
--- a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java
+++ b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java
@@ -58,15 +58,6 @@ void InvokeJurassic2Async() {
         System.out.println("Test async invoke Jurassic-2 passed.");
     }
 
-    @Test
-    @Tag("IntegrationTest")
-    void InvokeLlama2Async() {
-        var prompt = "In one sentence, what is a large-language model?";
-        var generatedText = InvokeModelAsync.invokeLlama2(prompt);
-        assertNotNullOrEmpty(generatedText);
-        System.out.println("Test async invoke Llama 2 passed.");
-    }
-
     @Test
     @Tag("IntegrationTest")
     void InvokeClaude3WithResponseStream() {
diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java
index 396cb7fd565..3d70c9b7dc7 100644
--- a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java
+++ b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java
@@ -53,12 +53,4 @@ void InvokeJurassic2() {
         assertNotNullOrEmpty(completion);
         System.out.println("Test sync invoke Jurassic-2 passed.");
     }
-
-    @Test
-    @Tag("IntegrationTest")
-    void InvokeLlama2() {
-        String completion = InvokeModel.invokeLlama2("In one sentence, what is a large-language model?");
-        assertNotNullOrEmpty(completion);
-        System.out.println("Test sync invoke Llama 2 passed.");
-    }
 }
diff --git a/python/example_code/bedrock-runtime/README.md b/python/example_code/bedrock-runtime/README.md
index 6feee8bc3af..2d3fd43ff1b 100644
--- a/python/example_code/bedrock-runtime/README.md
+++ b/python/example_code/bedrock-runtime/README.md
@@ -39,15 +39,18 @@ python -m pip install -r requirements.txt
 ### Invoke model examples
 
 - [AI21 Labs Jurassic-2: Text generation](bedrock_runtime_wrapper.py#L79)
-- [Amazon Titan: Image generation](bedrock_runtime_wrapper.py#L275)
-- [Anthropic Claude 2: Real-time response stream processing](bedrock_runtime_wrapper.py#L320)
+- [Amazon Titan: Image generation](bedrock_runtime_wrapper.py#L238)
+- [Anthropic Claude 2: Real-time response stream processing](bedrock_runtime_wrapper.py#L283)
 - [Anthropic Claude 2: Text generation](bedrock_runtime_wrapper.py#L39)
 - [Anthropic Claude 3: Multimodal invocation](models/anthropic/claude_3.py#L94)
 - [Anthropic Claude 3: Text generation](models/anthropic/claude_3.py#L33)
-- [Meta Llama 2: Text generation](bedrock_runtime_wrapper.py#L115)
-- [Mistral AI: Text generation with Mistral 7B Instruct](bedrock_runtime_wrapper.py#L152)
-- [Mistral AI: Text generation with Mixtral 8x7B Instruct](bedrock_runtime_wrapper.py#L192)
-- [Stable Diffusion: Image generation](bedrock_runtime_wrapper.py#L232)
+- [Meta Llama 2: Text generation](models/meta/llama2/invoke_model_quickstart.py#L4)
+- [Meta Llama 2: Text generation with response stream](models/meta/llama2/invoke_model_with_response_stream_quickstart.py#L4)
+- [Meta Llama 3: Text generation](models/meta/llama3/invoke_model_quickstart.py#L4)
+- [Meta Llama 3: Text generation with response stream](models/meta/llama3/invoke_model_with_response_stream_quickstart.py#L4)
+- [Mistral AI: Text generation with Mistral 7B Instruct](bedrock_runtime_wrapper.py#L115)
+- [Mistral AI: Text generation with Mixtral 8x7B Instruct](bedrock_runtime_wrapper.py#L155)
+- [Stable Diffusion: Image generation](bedrock_runtime_wrapper.py#L195)
 
 
 <!--custom.examples.start-->
diff --git a/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py b/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py
index f1ce10bcf32..abfea8f5f71 100644
--- a/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py
+++ b/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py
@@ -112,43 +112,6 @@ def invoke_jurassic2(self, prompt):
 
     # snippet-end:[python.example_code.bedrock-runtime.InvokeAi21Jurassic2]
 
-    # snippet-start:[python.example_code.bedrock-runtime.InvokeMetaLlama2]
-    def invoke_llama2(self, prompt):
-        """
-        Invokes the Meta Llama 2 large-language model to run an inference
-        using the input provided in the request body.
-
-        :param prompt: The prompt that you want Llama 2 to complete.
-        :return: Inference response from the model.
-        """
-
-        try:
-            # The different model providers have individual request and response formats.
-            # For the format, ranges, and default values for Meta Llama 2 Chat, refer to:
-            # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.html
-
-            body = {
-                "prompt": prompt,
-                "temperature": 0.5,
-                "top_p": 0.9,
-                "max_gen_len": 512,
-            }
-
-            response = self.bedrock_runtime_client.invoke_model(
-                modelId="meta.llama2-13b-chat-v1", body=json.dumps(body)
-            )
-
-            response_body = json.loads(response["body"].read())
-            completion = response_body["generation"]
-
-            return completion
-
-        except ClientError:
-            logger.error("Couldn't invoke Llama 2")
-            raise
-
-    # snippet-end:[python.example_code.bedrock-runtime.InvokeMetaLlama2]
-
     # snippet-start:[python.example_code.bedrock-runtime.InvokeMistral7B]
     def invoke_mistral_7b(self, prompt):
         """
diff --git a/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py
new file mode 100644
index 00000000000..7b5c2bc1d65
--- /dev/null
+++ b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py
@@ -0,0 +1,44 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# snippet-start:[python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart]
+# Send a prompt to Meta Llama 2 and print the response.
+
+import boto3
+import json
+
+# Create a Bedrock Runtime client in the AWS Region of your choice.
+client = boto3.client("bedrock-runtime", region_name="us-west-2")
+
+# Set the model ID, e.g., Llama 2 Chat 13B.
+model_id = "meta.llama2-13b-chat-v1"
+
+# Define the user message to send.
+user_message = "Describe the purpose of a 'hello world' program in one line."
+
+# Embed the message in Llama 2's prompt format.
+prompt = f"<s>[INST] {user_message} [/INST]"
+
+# Format the request payload using the model's native structure.
+request = {
+    "prompt": prompt,
+    # Optional inference parameters:
+    "max_gen_len": 512,
+    "temperature": 0.5,
+    "top_p": 0.9,
+}
+
+# Encode and send the request.
+response = client.invoke_model(body=json.dumps(request), modelId=model_id)
+
+# Decode the native response body.
+model_response = json.loads(response["body"].read())
+
+# Extract and print the generated text.
+response_text = model_response["generation"]
+print(response_text)
+
+# Learn more about the Llama 2 prompt format at:
+# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+# snippet-end:[python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart]
diff --git a/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py
new file mode 100644
index 00000000000..92961a3d895
--- /dev/null
+++ b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py
@@ -0,0 +1,46 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# snippet-start:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
+# Send a prompt to Meta Llama 2 and print the response stream in real-time.
+
+import boto3
+import json
+
+# Create a Bedrock Runtime client in the AWS Region of your choice.
+client = boto3.client("bedrock-runtime", region_name="us-west-2")
+
+# Set the model ID, e.g., Llama 2 Chat 13B.
+model_id = "meta.llama2-13b-chat-v1"
+
+# Define the user message to send.
+user_message = "Describe the purpose of a 'hello world' program in one line."
+
+# Embed the message in Llama 2's prompt format.
+prompt = f"<s>[INST] {user_message} [/INST]"
+
+# Format the request payload using the model's native structure.
+request = {
+    "prompt": prompt,
+    # Optional inference parameters:
+    "max_gen_len": 512,
+    "temperature": 0.5,
+    "top_p": 0.9,
+}
+
+# Encode and send the request.
+response_stream = client.invoke_model_with_response_stream(
+    body=json.dumps(request),
+    modelId=model_id,
+)
+
+# Extract and print the response text in real-time.
+for event in response_stream["body"]:
+    chunk = json.loads(event["chunk"]["bytes"])
+    if "generation" in chunk:
+        print(chunk["generation"], end="")
+
+# Learn more about the Llama 2 prompt format at:
+# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+# snippet-end:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
diff --git a/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py
new file mode 100644
index 00000000000..9aeceb38549
--- /dev/null
+++ b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py
@@ -0,0 +1,50 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# snippet-start:[python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart]
+# Send a prompt to Meta Llama 3 and print the response.
+
+import boto3
+import json
+
+# Create a Bedrock Runtime client in the AWS Region of your choice.
+client = boto3.client("bedrock-runtime", region_name="us-west-2")
+
+# Set the model ID, e.g., Llama 3 8B Instruct.
+model_id = "meta.llama3-8b-instruct-v1:0"
+
+# Define the user message to send.
+user_message = "Describe the purpose of a 'hello world' program in one line."
+
+# Embed the message in Llama 3's prompt format.
+prompt = f"""
+<|begin_of_text|>
+<|start_header_id|>user<|end_header_id|>
+{user_message}
+<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+"""
+
+# Format the request payload using the model's native structure.
+request = {
+    "prompt": prompt,
+    # Optional inference parameters:
+    "max_gen_len": 512,
+    "temperature": 0.5,
+    "top_p": 0.9,
+}
+
+# Encode and send the request.
+response = client.invoke_model(body=json.dumps(request), modelId=model_id)
+
+# Decode the native response body.
+model_response = json.loads(response["body"].read())
+
+# Extract and print the generated text.
+response_text = model_response["generation"]
+print(response_text)
+
+# Learn more about the Llama 3 prompt format in the documentation:
+# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+
+# snippet-end:[python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart]
diff --git a/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py
new file mode 100644
index 00000000000..c86ad3a422f
--- /dev/null
+++ b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py
@@ -0,0 +1,52 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# snippet-start:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart]
+# Send a prompt to Meta Llama 3 and print the response stream in real-time.
+
+import boto3
+import json
+
+# Create a Bedrock Runtime client in the AWS Region of your choice.
+client = boto3.client("bedrock-runtime", region_name="us-west-2")
+
+# Set the model ID, e.g., Llama 3 8B Instruct.
+model_id = "meta.llama3-8b-instruct-v1:0"
+
+# Define the user message to send.
+user_message = "Describe the purpose of a 'hello world' program in one line."
+
+# Embed the message in Llama 3's prompt format.
+prompt = f"""
+<|begin_of_text|>
+<|start_header_id|>user<|end_header_id|>
+{user_message}
+<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+"""
+
+# Format the request payload using the model's native structure.
+request = {
+    "prompt": prompt,
+    # Optional inference parameters:
+    "max_gen_len": 512,
+    "temperature": 0.5,
+    "top_p": 0.9,
+}
+
+# Encode and send the request.
+response_stream = client.invoke_model_with_response_stream(
+    body=json.dumps(request),
+    modelId=model_id,
+)
+
+# Extract and print the response text in real-time.
+for event in response_stream["body"]:
+    chunk = json.loads(event["chunk"]["bytes"])
+    if "generation" in chunk:
+        print(chunk["generation"], end="")
+
+# Learn more about the Llama 3 prompt format at:
+# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+
+# snippet-end:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart]
diff --git a/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py b/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py
new file mode 100644
index 00000000000..966e001f706
--- /dev/null
+++ b/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py
@@ -0,0 +1,62 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Integration tests for:
+- models/meta/llama2/invoke_model_quickstart.py
+- models/meta/llama3/invoke_model_quickstart.py
+"""
+
+import pytest
+import subprocess
+import sys
+
+
+@pytest.mark.integ
+def test_llama2_quickstart():
+    result = subprocess.run(
+        [sys.executable, "models/meta/llama2/invoke_model_quickstart.py"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.stdout != ""
+    assert result.returncode == 0
+
+
+@pytest.mark.integ
+def test_llama2_with_response_stream_quickstart():
+    result = subprocess.run(
+        [
+            sys.executable,
+            "models/meta/llama2/invoke_model_with_response_stream_quickstart.py",
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.stdout != ""
+    assert result.returncode == 0
+
+
+@pytest.mark.integ
+def test_llama3_quickstart():
+    result = subprocess.run(
+        [sys.executable, "models/meta/llama2/invoke_model_quickstart.py"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.stdout != ""
+    assert result.returncode == 0
+
+
+@pytest.mark.integ
+def test_llama3_with_response_stream_quickstart():
+    result = subprocess.run(
+        [
+            sys.executable,
+            "models/meta/llama3/invoke_model_with_response_stream_quickstart.py",
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.stdout != ""
+    assert result.returncode == 0
diff --git a/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py b/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py
index 171e021d6ea..2be67b0547e 100644
--- a/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py
+++ b/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py
@@ -98,27 +98,6 @@ def test_invoke_jurassic2(make_stubber, error_code):
         assert exc_info.value.response["Error"]["Code"] == error_code
 
 
-@pytest.mark.parametrize("error_code", [None, "ClientError"])
-def test_invoke_llama2(make_stubber, error_code):
-    bedrock_runtime = boto3.client(
-        service_name="bedrock-runtime", region_name="us-east-1"
-    )
-    bedrock_runtime_stubber = make_stubber(bedrock_runtime)
-    wrapper = BedrockRuntimeWrapper(bedrock_runtime)
-
-    prompt = "Hey, how are you?"
-
-    bedrock_runtime_stubber.stub_invoke_llama2(prompt, error_code=error_code)
-
-    if error_code is None:
-        got_completion = wrapper.invoke_llama2(prompt)
-        assert len(got_completion) > 0
-    else:
-        with pytest.raises(ClientError) as exc_info:
-            wrapper.invoke_llama2(prompt)
-        assert exc_info.value.response["Error"]["Code"] == error_code
-
-
 @pytest.mark.asyncio
 @pytest.mark.parametrize("error_code", ["ClientError"])
 async def test_invoke_model_with_response_stream(make_stubber, error_code):
diff --git a/python/test_tools/bedrock_runtime_stubber.py b/python/test_tools/bedrock_runtime_stubber.py
index ed2af9a51d9..6ac95919308 100644
--- a/python/test_tools/bedrock_runtime_stubber.py
+++ b/python/test_tools/bedrock_runtime_stubber.py
@@ -118,24 +118,6 @@ def stub_invoke_jurassic2(self, prompt, error_code=None):
             "invoke_model", expected_params, response, error_code=error_code
         )
 
-    def stub_invoke_llama2(self, prompt, error_code=None):
-        expected_params = {
-            "modelId": "meta.llama2-13b-chat-v1",
-            "body": json.dumps(
-                {"prompt": prompt, "temperature": 0.5, "top_p": 0.9, "max_gen_len": 512}
-            ),
-        }
-
-        response_body = io.BytesIO(
-            json.dumps({"generation": "Fake completion response."}).encode("utf-8")
-        )
-
-        response = {"body": response_body, "contentType": ""}
-
-        self._stub_bifurcator(
-            "invoke_model", expected_params, response, error_code=error_code
-        )
-
     def stub_invoke_model_with_response_stream(self, prompt, error_code=None):
         expected_params = {
             "modelId": "anthropic.claude-v2",