Python, Java, JavaScript: Bedrock getting started examples for Llama …

…2 and 3 (awsdocs#6375) Co-authored-by: Dennis Traub <[email protected]>
DavidSouther · Apr 30, 2024 · 62677d3 · 62677d3
1 parent 7592456
commit 62677d3
Show file tree

Hide file tree

Showing 32 changed files with 969 additions and 333 deletions.
diff --git a/.doc_gen/metadata/bedrock-runtime_metadata.yaml b/.doc_gen/metadata/bedrock-runtime_metadata.yaml
@@ -519,10 +519,10 @@ bedrock-runtime_InvokeJurassic2:
   services:
     bedrock-runtime: {InvokeModel}
 
-bedrock-runtime_InvokeLlama2:
-  title: Invoke the Meta Llama 2 Chat model on &BR; for text generation
+bedrock-runtime_Llama2_InvokeLlama:
+  title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads
   title_abbrev: "Meta Llama 2: Text generation"
-  synopsis: invoke the Meta Llama 2 Chat model on &BR; for text generation.
+  synopsis: get started sending prompts to Meta Llama 2 and printing the response.
   category: Invoke model examples
   languages:
     Go:
@@ -538,20 +538,17 @@ bedrock-runtime_InvokeLlama2:
         - sdk_version: 2
           github: javav2/example_code/bedrock-runtime
           excerpts:
-            - description: Asynchronously invoke the Meta Llama 2 Chat foundation model to generate text.
+            - description: Send your first prompt to Meta Llama 2.
               snippet_tags:
-                - bedrock-runtime.java2.invoke_llama2_async.main
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
-              snippet_tags:
-                - bedrock-runtime.java2.invoke_llama2.main
+                - bedrock-runtime.java2.InvokeModel_Llama2_Quickstart
     JavaScript:
       versions:
         - sdk_version: 3
           github: javascriptv3/example_code/bedrock-runtime
           excerpts:
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
-              snippet_files:
-                - javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js
+            - description: Send your first prompt to Meta Llama 2.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart
     PHP:
       versions:
         - sdk_version: 3
@@ -565,9 +562,9 @@ bedrock-runtime_InvokeLlama2:
         - sdk_version: 3
           github: python/example_code/bedrock-runtime
           excerpts:
-            - description: Invoke the Meta Llama 2 Chat foundation model to generate text.
+            - description: Send your first prompt to Meta Llama 2.
               snippet_tags:
-                - python.example_code.bedrock-runtime.InvokeMetaLlama2
+                - python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart
     .NET:
       versions:
         - sdk_version: 3
@@ -579,6 +576,106 @@ bedrock-runtime_InvokeLlama2:
   services:
     bedrock-runtime: {InvokeModel}
 
+bedrock-runtime_Llama2_InvokeModelWithResponseStream:
+  title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads with a response stream
+  title_abbrev: "Meta Llama 2: Text generation with response stream"
+  synopsis: get started sending prompts to Meta Llama 2 and printing the response stream in real-time.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart
+  services:
+    bedrock-runtime: {InvokeModelWithResponseStream}
+
+
+bedrock-runtime_Llama3_InvokeLlama:
+  title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads
+  title_abbrev: "Meta Llama 3: Text generation"
+  synopsis: get started sending prompts to Meta Llama 3 and printing the response.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModel_Llama3_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart
+  services:
+    bedrock-runtime: {InvokeModel}
+
+bedrock-runtime_Llama3_InvokeModelWithResponseStream:
+  title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads with a response stream
+  title_abbrev: "Meta Llama 3: Text generation with response stream"
+  synopsis: get started sending prompts to Meta Llama 3 and printing the response stream in real-time.
+  category: Invoke model examples
+  languages:
+    Java:
+      versions:
+        - sdk_version: 2
+          github: javav2/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart
+    JavaScript:
+      versions:
+        - sdk_version: 3
+          github: javascriptv3/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart
+    Python:
+      versions:
+        - sdk_version: 3
+          github: python/example_code/bedrock-runtime
+          excerpts:
+            - description: Send your first prompt to Meta Llama 3.
+              snippet_tags:
+                - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart
+  services:
+    bedrock-runtime: {InvokeModelWithResponseStream}
+
 bedrock-runtime_Scenario_InvokeModels:
   title: Invoke various foundation models on &BR;
   title_abbrev: Invoke multiple foundation models on &BR;

diff --git a/javascriptv3/example_code/bedrock-runtime/README.md b/javascriptv3/example_code/bedrock-runtime/README.md
@@ -51,7 +51,10 @@ functions within the same service.
 - [Anthropic Claude 2: Text generation](models/anthropic_claude/claude_2.js)
 - [Anthropic Claude 3: Text generation](models/anthropic_claude/claude_3.js)
 - [Anthropic Claude Instant: Text generation](models/anthropic_claude/claude_instant_1.js)
-- [Meta Llama 2: Text generation](models/meta_llama2/llama2_chat.js)
+- [Meta Llama 2: Text generation](models/meta/llama2/invoke_model_quickstart.js#L4)
+- [Meta Llama 2: Text generation with response stream](models/meta/llama2/invoke_model_with_response_stream_quickstart.js#L4)
+- [Meta Llama 3: Text generation](models/meta/llama3/invoke_model_quickstart.js#L4)
+- [Meta Llama 3: Text generation with response stream](models/meta/llama3/invoke_model_with_response_stream_quickstart.js#L4)
 - [Mistral AI: Text generation with Mistral 7B Instruct](models/mistral_ai/mistral_7b.js)
 - [Mistral AI: Text generation with Mixtral 8x7B Instruct](models/mistral_ai/mixtral_8x7b.js)
 

diff --git a/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js b/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js
@@ -50,18 +50,6 @@ export const FoundationModels = Object.freeze({
     module: () => import("../models/ai21_labs_jurassic2/jurassic2.js"),
     invoker: (/** @type {Module} */ module) => module.invokeModel,
   },
-  LLAMA2_CHAT_13B: {
-    modelId: "meta.llama2-13b-chat-v1",
-    modelName: "Llama 2 Chat 13B",
-    module: () => import("../models/meta_llama2/llama2_chat.js"),
-    invoker: (/** @type {Module} */ module) => module.invokeModel,
-  },
-  LLAMA2_CHAT_70B: {
-    modelId: "meta.llama2-70b-chat-v1",
-    modelName: "Llama 2 Chat 70B",
-    module: () => import("../models/meta_llama2/llama2_chat.js"),
-    invoker: (/** @type {Module} */ module) => module.invokeModel,
-  },
   MISTRAL_7B: {
     modelId: "mistral.mistral-7b-instruct-v0:2",
     modelName: "Mistral 7B Instruct",

diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js
@@ -0,0 +1,54 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 2 Chat 13B.
+const modelId = "meta.llama2-13b-chat-v1";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 2's prompt format.
+const prompt = `<s>[INST] ${userMessage} [/INST]`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const response = await client.send(
+  new InvokeModelCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Decode the native response body.
+/** @type {{ generation: string }} */
+const nativeResponse = JSON.parse(new TextDecoder().decode(response.body));
+
+// Extract and print the generated text.
+const responseText = nativeResponse.generation;
+console.log(responseText);
+
+// Learn more about the Llama 2 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2
+
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart]
diff --git a/...e_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js b/...e_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js
@@ -0,0 +1,54 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
+// Send a prompt to Meta Llama 2 and print the response stream in real-time.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelWithResponseStreamCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 2 Chat 13B.
+const modelId = "meta.llama2-13b-chat-v1";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 2's prompt format.
+const prompt = `<s>[INST] ${userMessage} [/INST]`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const responseStream = await client.send(
+  new InvokeModelWithResponseStreamCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Extract and print the response stream in real-time.
+for await (const event of responseStream.body) {
+  /** @type {{ generation: string }} */
+  const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
+  if (chunk.generation) {
+    process.stdout.write(chunk.generation);
+  }
+}
+
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart]
diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js
@@ -0,0 +1,59 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart]
+// Send a prompt to Meta Llama 3 and print the response.
+
+import {
+  BedrockRuntimeClient,
+  InvokeModelCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+
+// Create a Bedrock Runtime client in the AWS Region of your choice.
+const client = new BedrockRuntimeClient({ region: "us-west-2" });
+
+// Set the model ID, e.g., Llama 3 8B Instruct.
+const modelId = "meta.llama3-8b-instruct-v1:0";
+
+// Define the user message to send.
+const userMessage =
+  "Describe the purpose of a 'hello world' program in one sentence.";
+
+// Embed the message in Llama 3's prompt format.
+const prompt = `
+<|begin_of_text|>
+<|start_header_id|>user<|end_header_id|>
+${userMessage}
+<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+`;
+
+// Format the request payload using the model's native structure.
+const request = {
+  prompt,
+  // Optional inference parameters:
+  max_gen_len: 512,
+  temperature: 0.5,
+  top_p: 0.9,
+};
+
+// Encode and send the request.
+const response = await client.send(
+  new InvokeModelCommand({
+    contentType: "application/json",
+    body: JSON.stringify(request),
+    modelId,
+  }),
+);
+
+// Decode the native response body.
+/** @type {{ generation: string }} */
+const nativeResponse = JSON.parse(new TextDecoder().decode(response.body));
+
+// Extract and print the generated text.
+const responseText = nativeResponse.generation;
+console.log(responseText);
+
+// Learn more about the Llama 3 prompt format at:
+// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3
+// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart]