From 62677d30accf34ff2ce3b7ddd3d59390aebe5dec Mon Sep 17 00:00:00 2001 From: Dennis Traub Date: Thu, 25 Apr 2024 17:06:56 +0200 Subject: [PATCH] Python, Java, JavaScript: Bedrock getting started examples for Llama 2 and 3 (#6375) Co-authored-by: Dennis Traub --- .../metadata/bedrock-runtime_metadata.yaml | 123 ++++++++++++++++-- .../example_code/bedrock-runtime/README.md | 5 +- .../config/foundation_models.js | 12 -- .../meta/llama2/invoke_model_quickstart.js | 54 ++++++++ ...e_model_with_response_stream_quickstart.js | 54 ++++++++ .../meta/llama3/invoke_model_quickstart.js | 59 +++++++++ ...e_model_with_response_stream_quickstart.js | 60 +++++++++ .../models/meta_llama2/llama2_chat.js | 67 ---------- .../tests/meta_llama.integration.test.js | 54 ++++++++ .../tests/meta_llama2.integration.test.js | 25 ---- javav2/example_code/bedrock-runtime/README.md | 9 +- .../BedrockRuntimeUsageDemo.java | 5 - .../example/bedrockruntime/InvokeModel.java | 47 ------- .../bedrockruntime/InvokeModelAsync.java | 61 --------- .../InvokeModelWithResponseStream.java | 3 + .../meta/llama2/InvokeModelQuickstart.java | 55 ++++++++ ...vokeModelWithResponseStreamQuickstart.java | 62 +++++++++ .../meta/llama3/InvokeModelQuickstart.java | 63 +++++++++ ...vokeModelWithResponseStreamQuickstart.java | 70 ++++++++++ .../src/test/java/TestLlama2.java | 26 ++++ .../src/test/java/TestLlama3.java | 26 ++++ .../src/test/java/TextToTextAsyncTest.java | 9 -- .../src/test/java/TextToTextSyncTest.java | 8 -- python/example_code/bedrock-runtime/README.md | 15 ++- .../bedrock_runtime_wrapper.py | 37 ------ .../meta/llama2/invoke_model_quickstart.py | 44 +++++++ ...e_model_with_response_stream_quickstart.py | 46 +++++++ .../meta/llama3/invoke_model_quickstart.py | 50 +++++++ ...e_model_with_response_stream_quickstart.py | 52 ++++++++ .../meta/test_invoke_model_quickstarts.py | 62 +++++++++ .../test/test_bedrock_runtime_wrapper.py | 21 --- python/test_tools/bedrock_runtime_stubber.py | 18 --- 32 files changed, 969 insertions(+), 333 deletions(-) create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js create mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js delete mode 100644 javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js create mode 100644 javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js delete mode 100644 javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java create mode 100644 javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java create mode 100644 javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java create mode 100644 javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java create mode 100644 python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py create mode 100644 python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py create mode 100644 python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py create mode 100644 python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py create mode 100644 python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py diff --git a/.doc_gen/metadata/bedrock-runtime_metadata.yaml b/.doc_gen/metadata/bedrock-runtime_metadata.yaml index 88af4fc5bf9..071fcc361f7 100644 --- a/.doc_gen/metadata/bedrock-runtime_metadata.yaml +++ b/.doc_gen/metadata/bedrock-runtime_metadata.yaml @@ -519,10 +519,10 @@ bedrock-runtime_InvokeJurassic2: services: bedrock-runtime: {InvokeModel} -bedrock-runtime_InvokeLlama2: - title: Invoke the Meta Llama 2 Chat model on &BR; for text generation +bedrock-runtime_Llama2_InvokeLlama: + title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads title_abbrev: "Meta Llama 2: Text generation" - synopsis: invoke the Meta Llama 2 Chat model on &BR; for text generation. + synopsis: get started sending prompts to Meta Llama 2 and printing the response. category: Invoke model examples languages: Go: @@ -538,20 +538,17 @@ bedrock-runtime_InvokeLlama2: - sdk_version: 2 github: javav2/example_code/bedrock-runtime excerpts: - - description: Asynchronously invoke the Meta Llama 2 Chat foundation model to generate text. + - description: Send your first prompt to Meta Llama 2. snippet_tags: - - bedrock-runtime.java2.invoke_llama2_async.main - - description: Invoke the Meta Llama 2 Chat foundation model to generate text. - snippet_tags: - - bedrock-runtime.java2.invoke_llama2.main + - bedrock-runtime.java2.InvokeModel_Llama2_Quickstart JavaScript: versions: - sdk_version: 3 github: javascriptv3/example_code/bedrock-runtime excerpts: - - description: Invoke the Meta Llama 2 Chat foundation model to generate text. - snippet_files: - - javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js + - description: Send your first prompt to Meta Llama 2. + snippet_tags: + - javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart PHP: versions: - sdk_version: 3 @@ -565,9 +562,9 @@ bedrock-runtime_InvokeLlama2: - sdk_version: 3 github: python/example_code/bedrock-runtime excerpts: - - description: Invoke the Meta Llama 2 Chat foundation model to generate text. + - description: Send your first prompt to Meta Llama 2. snippet_tags: - - python.example_code.bedrock-runtime.InvokeMetaLlama2 + - python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart .NET: versions: - sdk_version: 3 @@ -579,6 +576,106 @@ bedrock-runtime_InvokeLlama2: services: bedrock-runtime: {InvokeModel} +bedrock-runtime_Llama2_InvokeModelWithResponseStream: + title: Invoke Meta Llama 2 on &BR; using Meta's native request and response payloads with a response stream + title_abbrev: "Meta Llama 2: Text generation with response stream" + synopsis: get started sending prompts to Meta Llama 2 and printing the response stream in real-time. + category: Invoke model examples + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart + JavaScript: + versions: + - sdk_version: 3 + github: javascriptv3/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart + Python: + versions: + - sdk_version: 3 + github: python/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart + services: + bedrock-runtime: {InvokeModelWithResponseStream} + + +bedrock-runtime_Llama3_InvokeLlama: + title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads + title_abbrev: "Meta Llama 3: Text generation" + synopsis: get started sending prompts to Meta Llama 3 and printing the response. + category: Invoke model examples + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - bedrock-runtime.java2.InvokeModel_Llama3_Quickstart + JavaScript: + versions: + - sdk_version: 3 + github: javascriptv3/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart + Python: + versions: + - sdk_version: 3 + github: python/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart + services: + bedrock-runtime: {InvokeModel} + +bedrock-runtime_Llama3_InvokeModelWithResponseStream: + title: Invoke Meta Llama 3 on &BR; using Meta's native request and response payloads with a response stream + title_abbrev: "Meta Llama 3: Text generation with response stream" + synopsis: get started sending prompts to Meta Llama 3 and printing the response stream in real-time. + category: Invoke model examples + languages: + Java: + versions: + - sdk_version: 2 + github: javav2/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart + JavaScript: + versions: + - sdk_version: 3 + github: javascriptv3/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart + Python: + versions: + - sdk_version: 3 + github: python/example_code/bedrock-runtime + excerpts: + - description: Send your first prompt to Meta Llama 3. + snippet_tags: + - python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart + services: + bedrock-runtime: {InvokeModelWithResponseStream} + bedrock-runtime_Scenario_InvokeModels: title: Invoke various foundation models on &BR; title_abbrev: Invoke multiple foundation models on &BR; diff --git a/javascriptv3/example_code/bedrock-runtime/README.md b/javascriptv3/example_code/bedrock-runtime/README.md index 6ba29e94559..234ae1ec4da 100644 --- a/javascriptv3/example_code/bedrock-runtime/README.md +++ b/javascriptv3/example_code/bedrock-runtime/README.md @@ -51,7 +51,10 @@ functions within the same service. - [Anthropic Claude 2: Text generation](models/anthropic_claude/claude_2.js) - [Anthropic Claude 3: Text generation](models/anthropic_claude/claude_3.js) - [Anthropic Claude Instant: Text generation](models/anthropic_claude/claude_instant_1.js) -- [Meta Llama 2: Text generation](models/meta_llama2/llama2_chat.js) +- [Meta Llama 2: Text generation](models/meta/llama2/invoke_model_quickstart.js#L4) +- [Meta Llama 2: Text generation with response stream](models/meta/llama2/invoke_model_with_response_stream_quickstart.js#L4) +- [Meta Llama 3: Text generation](models/meta/llama3/invoke_model_quickstart.js#L4) +- [Meta Llama 3: Text generation with response stream](models/meta/llama3/invoke_model_with_response_stream_quickstart.js#L4) - [Mistral AI: Text generation with Mistral 7B Instruct](models/mistral_ai/mistral_7b.js) - [Mistral AI: Text generation with Mixtral 8x7B Instruct](models/mistral_ai/mixtral_8x7b.js) diff --git a/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js b/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js index a695e16774e..1dc2c5e7509 100644 --- a/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js +++ b/javascriptv3/example_code/bedrock-runtime/config/foundation_models.js @@ -50,18 +50,6 @@ export const FoundationModels = Object.freeze({ module: () => import("../models/ai21_labs_jurassic2/jurassic2.js"), invoker: (/** @type {Module} */ module) => module.invokeModel, }, - LLAMA2_CHAT_13B: { - modelId: "meta.llama2-13b-chat-v1", - modelName: "Llama 2 Chat 13B", - module: () => import("../models/meta_llama2/llama2_chat.js"), - invoker: (/** @type {Module} */ module) => module.invokeModel, - }, - LLAMA2_CHAT_70B: { - modelId: "meta.llama2-70b-chat-v1", - modelName: "Llama 2 Chat 70B", - module: () => import("../models/meta_llama2/llama2_chat.js"), - invoker: (/** @type {Module} */ module) => module.invokeModel, - }, MISTRAL_7B: { modelId: "mistral.mistral-7b-instruct-v0:2", modelName: "Mistral 7B Instruct", diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js new file mode 100644 index 00000000000..210aa789188 --- /dev/null +++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.js @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart] +// Send a prompt to Meta Llama 2 and print the response. + +import { + BedrockRuntimeClient, + InvokeModelCommand, +} from "@aws-sdk/client-bedrock-runtime"; + +// Create a Bedrock Runtime client in the AWS Region of your choice. +const client = new BedrockRuntimeClient({ region: "us-west-2" }); + +// Set the model ID, e.g., Llama 2 Chat 13B. +const modelId = "meta.llama2-13b-chat-v1"; + +// Define the user message to send. +const userMessage = + "Describe the purpose of a 'hello world' program in one sentence."; + +// Embed the message in Llama 2's prompt format. +const prompt = `[INST] ${userMessage} [/INST]`; + +// Format the request payload using the model's native structure. +const request = { + prompt, + // Optional inference parameters: + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9, +}; + +// Encode and send the request. +const response = await client.send( + new InvokeModelCommand({ + contentType: "application/json", + body: JSON.stringify(request), + modelId, + }), +); + +// Decode the native response body. +/** @type {{ generation: string }} */ +const nativeResponse = JSON.parse(new TextDecoder().decode(response.body)); + +// Extract and print the generated text. +const responseText = nativeResponse.generation; +console.log(responseText); + +// Learn more about the Llama 2 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2 + +// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama2_Quickstart] diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js new file mode 100644 index 00000000000..91012dc0c66 --- /dev/null +++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.js @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// snippet-start:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart] +// Send a prompt to Meta Llama 2 and print the response stream in real-time. + +import { + BedrockRuntimeClient, + InvokeModelWithResponseStreamCommand, +} from "@aws-sdk/client-bedrock-runtime"; + +// Create a Bedrock Runtime client in the AWS Region of your choice. +const client = new BedrockRuntimeClient({ region: "us-west-2" }); + +// Set the model ID, e.g., Llama 2 Chat 13B. +const modelId = "meta.llama2-13b-chat-v1"; + +// Define the user message to send. +const userMessage = + "Describe the purpose of a 'hello world' program in one sentence."; + +// Embed the message in Llama 2's prompt format. +const prompt = `[INST] ${userMessage} [/INST]`; + +// Format the request payload using the model's native structure. +const request = { + prompt, + // Optional inference parameters: + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9, +}; + +// Encode and send the request. +const responseStream = await client.send( + new InvokeModelWithResponseStreamCommand({ + contentType: "application/json", + body: JSON.stringify(request), + modelId, + }), +); + +// Extract and print the response stream in real-time. +for await (const event of responseStream.body) { + /** @type {{ generation: string }} */ + const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes)); + if (chunk.generation) { + process.stdout.write(chunk.generation); + } +} + +// Learn more about the Llama 3 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 +// snippet-end:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart] diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js new file mode 100644 index 00000000000..5b5abc3a840 --- /dev/null +++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.js @@ -0,0 +1,59 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// snippet-start:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart] +// Send a prompt to Meta Llama 3 and print the response. + +import { + BedrockRuntimeClient, + InvokeModelCommand, +} from "@aws-sdk/client-bedrock-runtime"; + +// Create a Bedrock Runtime client in the AWS Region of your choice. +const client = new BedrockRuntimeClient({ region: "us-west-2" }); + +// Set the model ID, e.g., Llama 3 8B Instruct. +const modelId = "meta.llama3-8b-instruct-v1:0"; + +// Define the user message to send. +const userMessage = + "Describe the purpose of a 'hello world' program in one sentence."; + +// Embed the message in Llama 3's prompt format. +const prompt = ` +<|begin_of_text|> +<|start_header_id|>user<|end_header_id|> +${userMessage} +<|eot_id|> +<|start_header_id|>assistant<|end_header_id|> +`; + +// Format the request payload using the model's native structure. +const request = { + prompt, + // Optional inference parameters: + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9, +}; + +// Encode and send the request. +const response = await client.send( + new InvokeModelCommand({ + contentType: "application/json", + body: JSON.stringify(request), + modelId, + }), +); + +// Decode the native response body. +/** @type {{ generation: string }} */ +const nativeResponse = JSON.parse(new TextDecoder().decode(response.body)); + +// Extract and print the generated text. +const responseText = nativeResponse.generation; +console.log(responseText); + +// Learn more about the Llama 3 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 +// snippet-end:[javascript.v3.bedrock-runtime.InvokeModel_Llama3_Quickstart] diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js new file mode 100644 index 00000000000..daa55bb7c21 --- /dev/null +++ b/javascriptv3/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.js @@ -0,0 +1,60 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// snippet-start:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart] +// Send a prompt to Meta Llama 3 and print the response stream in real-time. + +import { + BedrockRuntimeClient, + InvokeModelWithResponseStreamCommand, +} from "@aws-sdk/client-bedrock-runtime"; + +// Create a Bedrock Runtime client in the AWS Region of your choice. +const client = new BedrockRuntimeClient({ region: "us-west-2" }); + +// Set the model ID, e.g., Llama 3 8B Instruct. +const modelId = "meta.llama3-8b-instruct-v1:0"; + +// Define the user message to send. +const userMessage = + "Describe the purpose of a 'hello world' program in one sentence."; + +// Embed the message in Llama 3's prompt format. +const prompt = ` +<|begin_of_text|> +<|start_header_id|>user<|end_header_id|> +${userMessage} +<|eot_id|> +<|start_header_id|>assistant<|end_header_id|> +`; + +// Format the request payload using the model's native structure. +const request = { + prompt, + // Optional inference parameters: + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9, +}; + +// Encode and send the request. +const responseStream = await client.send( + new InvokeModelWithResponseStreamCommand({ + contentType: "application/json", + body: JSON.stringify(request), + modelId, + }), +); + +// Extract and print the response stream in real-time. +for await (const event of responseStream.body) { + /** @type {{ generation: string }} */ + const chunk = JSON.parse(new TextDecoder().decode(event.chunk.bytes)); + if (chunk.generation) { + process.stdout.write(chunk.generation); + } +} + +// Learn more about the Llama 3 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 +// snippet-end:[javascript.v3.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart] diff --git a/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js b/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js deleted file mode 100644 index 36210000d17..00000000000 --- a/javascriptv3/example_code/bedrock-runtime/models/meta_llama2/llama2_chat.js +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -import { fileURLToPath } from "url"; - -import { FoundationModels } from "../../config/foundation_models.js"; -import { - BedrockRuntimeClient, - InvokeModelCommand, -} from "@aws-sdk/client-bedrock-runtime"; - -/** - * @typedef {Object} ResponseBody - * @property {generation} text - */ - -/** - * Invokes a Meta Llama 2 Chat model. - * - * @param {string} prompt - The input text prompt for the model to complete. - * @param {string} [modelId] - The ID of the model to use. Defaults to "meta.llama2-13b-chat-v1". - */ -export const invokeModel = async ( - prompt, - modelId = "meta.llama2-13b-chat-v1", -) => { - // Create a new Bedrock Runtime client instance. - const client = new BedrockRuntimeClient({ region: "us-east-1" }); - - // Prepare the payload for the model. - const payload = { - prompt, - temperature: 0.5, - max_gen_len: 1000, - }; - - // Invoke the model with the payload and wait for the response. - const command = new InvokeModelCommand({ - contentType: "application/json", - body: JSON.stringify(payload), - modelId, - }); - const apiResponse = await client.send(command); - - // Decode and return the response. - const decodedResponseBody = new TextDecoder().decode(apiResponse.body); - /** @type {ResponseBody} */ - const responseBody = JSON.parse(decodedResponseBody); - return responseBody.generation; -}; - -// Invoke the function if this file was run directly. -if (process.argv[1] === fileURLToPath(import.meta.url)) { - const prompt = - 'Complete the following in one sentence: "Once upon a time..."'; - const modelId = FoundationModels.LLAMA2_CHAT_13B.modelId; - console.log(`Prompt: ${prompt}`); - console.log(`Model ID: ${modelId}`); - - try { - console.log("-".repeat(53)); - const response = await invokeModel(prompt, modelId); - console.log(response); - } catch (err) { - console.log(err); - } -} diff --git a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js new file mode 100644 index 00000000000..f522cfec44d --- /dev/null +++ b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama.integration.test.js @@ -0,0 +1,54 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { vi, describe, it, expect } from "vitest"; + +/** + * Integration tests for: + * - models/meta/llama2/*.js + * - models/meta/llama3/*.js + */ + +describe("Running the Llama2 InvokeModel quickstart", () => { + it("should run and log the model's response", async () => { + const log = vi.spyOn(console, "log").mockImplementation(() => {}); + await import("../models/meta/llama2/invoke_model_quickstart.js"); + expect(log).toHaveBeenCalledTimes(1); + log.mockRestore(); + }); +}); + +describe("Running the Llama2 InvokeModelWithResponseStream quickstart", () => { + it("should run and log the model's response", async () => { + const write = vi + .spyOn(process.stdout, "write") + .mockImplementation(() => {}); + await import( + "../models/meta/llama2/invoke_model_with_response_stream_quickstart.js" + ); + expect(write).toHaveBeenCalled(); + write.mockRestore(); + }); +}); + +describe("Running the Llama3 InvokeModel quickstart", () => { + it("should run and log the model's response", async () => { + const log = vi.spyOn(console, "log").mockImplementation(() => {}); + await import("../models/meta/llama3/invoke_model_quickstart.js"); + expect(log).toHaveBeenCalledTimes(1); + log.mockRestore(); + }); +}); + +describe("Running the Llama3 InvokeModelWithResponseStream quickstart", () => { + it("should run and log the model's response", async () => { + const write = vi + .spyOn(process.stdout, "write") + .mockImplementation(() => {}); + await import( + "../models/meta/llama3/invoke_model_with_response_stream_quickstart.js" + ); + expect(write).toHaveBeenCalled(); + write.mockRestore(); + }); +}); diff --git a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js b/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js deleted file mode 100644 index cd00ef0811e..00000000000 --- a/javascriptv3/example_code/bedrock-runtime/tests/meta_llama2.integration.test.js +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -import { describe, it } from "vitest"; -import { FoundationModels } from "../config/foundation_models.js"; -import { expectToBeANonEmptyString } from "./test_tools.js"; -import { invokeModel } from "../models/meta_llama2/llama2_chat.js"; - -const TEXT_PROMPT = "Hello, this is a test prompt"; - -describe("Invoke Llama2 Chat 13B", () => { - it("should return a response", async () => { - const modelId = FoundationModels.LLAMA2_CHAT_13B.modelId; - const response = await invokeModel(TEXT_PROMPT, modelId); - expectToBeANonEmptyString(response); - }); -}); - -describe("Invoke Llama2 Chat 70B", () => { - it("should return a response", async () => { - const modelId = FoundationModels.LLAMA2_CHAT_70B.modelId; - const response = await invokeModel(TEXT_PROMPT, modelId); - expectToBeANonEmptyString(response); - }); -}); diff --git a/javav2/example_code/bedrock-runtime/README.md b/javav2/example_code/bedrock-runtime/README.md index 0d012f2704b..2aceb3cda3f 100644 --- a/javav2/example_code/bedrock-runtime/README.md +++ b/javav2/example_code/bedrock-runtime/README.md @@ -41,14 +41,17 @@ functions within the same service. ### Invoke model examples - [AI21 Labs Jurassic-2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L205) -- [Amazon Titan: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L399) +- [Amazon Titan: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L338) - [Anthropic Claude 2: Real-time response stream processing](src/main/java/com/example/bedrockruntime/Claude2.java#L65) - [Anthropic Claude 2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModel.java#L112) - [Anthropic Claude 3: Real-time response stream processing](src/main/java/com/example/bedrockruntime/Claude3.java#L49) -- [Meta Llama 2: Text generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L268) +- [Meta Llama 2: Text generation](src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java#L11) +- [Meta Llama 2: Text generation with response stream](src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java#L12) +- [Meta Llama 3: Text generation](src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java#L13) +- [Meta Llama 3: Text generation with response stream](src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java#L14) - [Mistral AI: Text generation with Mistral 7B Instruct](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L33) - [Mistral AI: Text generation with Mixtral 8x7B Instruct](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L88) -- [Stable Diffusion: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L329) +- [Stable Diffusion: Image generation](src/main/java/com/example/bedrockruntime/InvokeModelAsync.java#L268) diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java index f74ccd8ab80..0d28151d0ed 100644 --- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/BedrockRuntimeUsageDemo.java @@ -27,7 +27,6 @@ public class BedrockRuntimeUsageDemo { private static final String CLAUDE = "anthropic.claude-v2"; private static final String JURASSIC2 = "ai21.j2-mid-v1"; - private static final String LLAMA2 = "meta.llama2-13b-chat-v1"; private static final String MISTRAL7B = "mistral.mistral-7b-instruct-v0:2"; private static final String MIXTRAL8X7B = "mistral.mixtral-8x7b-instruct-v0:1"; private static final String STABLE_DIFFUSION = "stability.stable-diffusion-xl"; @@ -44,7 +43,6 @@ private static void textToText() { String prompt = "In one sentence, what is a large-language model?"; BedrockRuntimeUsageDemo.invoke(CLAUDE, prompt); BedrockRuntimeUsageDemo.invoke(JURASSIC2, prompt); - BedrockRuntimeUsageDemo.invoke(LLAMA2, prompt); BedrockRuntimeUsageDemo.invoke(MISTRAL7B, prompt); BedrockRuntimeUsageDemo.invoke(MIXTRAL8X7B, prompt); } @@ -66,9 +64,6 @@ private static void invoke(String modelId, String prompt, String stylePreset) { case JURASSIC2: printResponse(invokeJurassic2(prompt)); break; - case LLAMA2: - printResponse(invokeLlama2(prompt)); - break; case MISTRAL7B: for (String response : invokeMistral7B(prompt)) { printResponse(response); diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java index c25c0467e18..8204ffde3b2 100644 --- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModel.java @@ -208,53 +208,6 @@ public static String invokeJurassic2(String prompt) { } // snippet-end:[bedrock-runtime.java2.invoke_jurassic2.main] - // snippet-start:[bedrock-runtime.java2.invoke_llama2.main] - /** - * Invokes the Meta Llama 2 Chat model to run an inference based on the provided - * input. - * - * @param prompt The prompt for Llama 2 to complete. - * @return The generated response. - */ - public static String invokeLlama2(String prompt) { - /* - * The different model providers have individual request and response formats. - * For the format, ranges, and default values for Meta Llama 2 Chat, refer to: - * https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta. - * html - */ - - String llama2ModelId = "meta.llama2-13b-chat-v1"; - - BedrockRuntimeClient client = BedrockRuntimeClient.builder() - .region(Region.US_EAST_1) - .credentialsProvider(ProfileCredentialsProvider.create()) - .build(); - - String payload = new JSONObject() - .put("prompt", prompt) - .put("max_gen_len", 512) - .put("temperature", 0.5) - .put("top_p", 0.9) - .toString(); - - InvokeModelRequest request = InvokeModelRequest.builder() - .body(SdkBytes.fromUtf8String(payload)) - .modelId(llama2ModelId) - .contentType("application/json") - .accept("application/json") - .build(); - - InvokeModelResponse response = client.invokeModel(request); - - JSONObject responseBody = new JSONObject(response.body().asUtf8String()); - - String generatedText = responseBody.getString("generation"); - - return generatedText; - } - // snippet-end:[bedrock-runtime.java2.invoke_llama2.main] - // snippet-start:[bedrock-runtime.java2.invoke_stable_diffusion.main] /** * Invokes the Stability.ai Stable Diffusion XL model to create an image based diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java index ca80b8a8d64..ef4361ee850 100644 --- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelAsync.java @@ -265,67 +265,6 @@ public static String invokeJurassic2(String prompt) { } // snippet-end:[bedrock-runtime.java2.invoke_jurassic-2_async.main] - // snippet-start:[bedrock-runtime.java2.invoke_llama2_async.main] - /** - * Asynchronously invokes the Meta Llama 2 Chat model to run an inference based - * on the provided input. - * - * @param prompt The prompt that you want Llama 2 to complete. - * @return The inference response generated by the model. - */ - public static String invokeLlama2(String prompt) { - /* - * The different model providers have individual request and response formats. - * For the format, ranges, and default values for Meta Llama 2 Chat, refer to: - * https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta. - * html - */ - - String llama2ModelId = "meta.llama2-13b-chat-v1"; - - BedrockRuntimeAsyncClient client = BedrockRuntimeAsyncClient.builder() - .region(Region.US_EAST_1) - .credentialsProvider(ProfileCredentialsProvider.create()) - .build(); - - String payload = new JSONObject() - .put("prompt", prompt) - .put("max_gen_len", 512) - .put("temperature", 0.5) - .put("top_p", 0.9) - .toString(); - - InvokeModelRequest request = InvokeModelRequest.builder() - .body(SdkBytes.fromUtf8String(payload)) - .modelId(llama2ModelId) - .contentType("application/json") - .accept("application/json") - .build(); - - CompletableFuture completableFuture = client.invokeModel(request) - .whenComplete((response, exception) -> { - if (exception != null) { - System.out.println("Model invocation failed: " + exception); - } - }); - - String generatedText = ""; - try { - InvokeModelResponse response = completableFuture.get(); - JSONObject responseBody = new JSONObject(response.body().asUtf8String()); - generatedText = responseBody.getString("generation"); - - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - System.err.println(e.getMessage()); - } catch (ExecutionException e) { - System.err.println(e.getMessage()); - } - - return generatedText; - } - // snippet-end:[bedrock-runtime.java2.invoke_llama2_async.main] - // snippet-start:[bedrock-runtime.java2.invoke_stable_diffusion_async.main] /** * Asynchronously invokes the Stability.ai Stable Diffusion XL model to create diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java index abe422469d3..7da4489b347 100644 --- a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/InvokeModelWithResponseStream.java @@ -4,6 +4,8 @@ package com.example.bedrockruntime; // snippet-start:[bedrock-runtime.java2.invoke_model_with_response_stream.import] + +import org.json.JSONArray; import org.json.JSONObject; import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.core.SdkBytes; @@ -12,6 +14,7 @@ import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamRequest; import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler; +import java.util.Iterator; import java.util.concurrent.atomic.AtomicReference; // snippet-end:[bedrock-runtime.java2.invoke_model_with_response_stream.import] diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java new file mode 100644 index 00000000000..174538d845f --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelQuickstart.java @@ -0,0 +1,55 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.bedrockruntime.models.meta.llama2; + +import org.json.JSONObject; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient; + +// snippet-start:[bedrock-runtime.java2.InvokeModel_Llama2_Quickstart] +// Send a prompt to Meta Llama 2 and print the response. +public class InvokeModelQuickstart { + + public static void main(String[] args) { + + // Create a Bedrock Runtime client in the AWS Region of your choice. + var client = BedrockRuntimeClient.builder() + .region(Region.US_WEST_2) + .build(); + + // Set the model ID, e.g., Llama 2 Chat 13B. + var modelId = "meta.llama2-13b-chat-v1"; + + // Define the user message to send. + var userMessage = "Describe the purpose of a 'hello world' program in one line."; + + // Embed the message in Llama 2's prompt format. + var prompt = "[INST] " + userMessage + " [/INST]"; + + // Create a JSON payload using the model's native structure. + var request = new JSONObject() + .put("prompt", prompt) + // Optional inference parameters: + .put("max_gen_len", 512) + .put("temperature", 0.5F) + .put("top_p", 0.9F); + + // Encode and send the request. + var response = client.invokeModel(req -> req + .body(SdkBytes.fromUtf8String(request.toString())) + .modelId(modelId)); + + // Decode the native response body. + var nativeResponse = new JSONObject(response.body().asUtf8String()); + + // Extract and print the response text. + var responseText = nativeResponse.getString("generation"); + System.out.println(responseText); + } +} +// Learn more about the Llama 2 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2 + +// snippet-end:[bedrock-runtime.java2.InvokeModel_Llama2_Quickstart] diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java new file mode 100644 index 00000000000..e2ea5511f18 --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama2/InvokeModelWithResponseStreamQuickstart.java @@ -0,0 +1,62 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.bedrockruntime.models.meta.llama2; + +import org.json.JSONObject; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeAsyncClient; +import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler; + +// snippet-start:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart] +// Send a prompt to Meta Llama 2 and print the response stream in real-time. +public class InvokeModelWithResponseStreamQuickstart { + + public static void main(String[] args) { + + // Create a Bedrock Runtime client in the AWS Region of your choice. + var client = BedrockRuntimeAsyncClient.builder() + .region(Region.US_WEST_2) + .build(); + + // Set the model ID, e.g., Llama 2 Chat 13B. + var modelId = "meta.llama2-13b-chat-v1"; + + // Define the user message to send. + var userMessage = "Describe the purpose of a 'hello world' program in one line."; + + // Embed the message in Llama 2's prompt format. + var prompt = "[INST] " + userMessage + " [/INST]"; + + // Create a JSON payload using the model's native structure. + var request = new JSONObject() + .put("prompt", prompt) + // Optional inference parameters: + .put("max_gen_len", 512) + .put("temperature", 0.5F) + .put("top_p", 0.9F); + + // Create a handler to extract and print the response text in real-time. + var streamHandler = InvokeModelWithResponseStreamResponseHandler.builder() + .subscriber(event -> event.accept( + InvokeModelWithResponseStreamResponseHandler.Visitor.builder() + .onChunk(c -> { + var chunk = new JSONObject(c.bytes().asUtf8String()); + if (chunk.has("generation")) { + System.out.print(chunk.getString("generation")); + } + }).build()) + ).build(); + + // Encode and send the request. Let the stream handler process the response. + client.invokeModelWithResponseStream(req -> req + .body(SdkBytes.fromUtf8String(request.toString())) + .modelId(modelId), streamHandler + ).join(); + } +} +// Learn more about the Llama 2 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2 + +// snippet-end:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama2_Quickstart] diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java new file mode 100644 index 00000000000..1652157e14b --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelQuickstart.java @@ -0,0 +1,63 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.bedrockruntime.models.meta.llama3; + +import org.json.JSONObject; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeClient; + +import java.text.MessageFormat; + +// snippet-start:[bedrock-runtime.java2.InvokeModel_Llama3_Quickstart] +// Send a prompt to Meta Llama 3 and print the response. +public class InvokeModelQuickstart { + + public static void main(String[] args) { + + // Create a Bedrock Runtime client in the AWS Region of your choice. + var client = BedrockRuntimeClient.builder() + .region(Region.US_WEST_2) + .build(); + + // Set the model ID, e.g., Llama 3 8B Instruct. + var modelId = "meta.llama3-8b-instruct-v1:0"; + + // Define the user message to send. + var userMessage = "Describe the purpose of a 'hello world' program in one line."; + + // Embed the message in Llama 3's prompt format. + var prompt = MessageFormat.format(""" + <|begin_of_text|> + <|start_header_id|>user<|end_header_id|> + {0} + <|eot_id|> + <|start_header_id|>assistant<|end_header_id|> + """, userMessage); + + // Create a JSON payload using the model's native structure. + var request = new JSONObject() + .put("prompt", prompt) + // Optional inference parameters: + .put("max_gen_len", 512) + .put("temperature", 0.5F) + .put("top_p", 0.9F); + + // Encode and send the request. + var response = client.invokeModel(req -> req + .body(SdkBytes.fromUtf8String(request.toString())) + .modelId(modelId)); + + // Decode the native response body. + var nativeResponse = new JSONObject(response.body().asUtf8String()); + + // Extract and print the response text. + var responseText = nativeResponse.getString("generation"); + System.out.println(responseText); + } +} +// Learn more about the Llama 3 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 + +// snippet-end:[bedrock-runtime.java2.InvokeModel_Llama3_Quickstart] diff --git a/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java new file mode 100644 index 00000000000..845c8c434ef --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/main/java/com/example/bedrockruntime/models/meta/llama3/InvokeModelWithResponseStreamQuickstart.java @@ -0,0 +1,70 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.example.bedrockruntime.models.meta.llama3; + +import org.json.JSONObject; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.bedrockruntime.BedrockRuntimeAsyncClient; +import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelWithResponseStreamResponseHandler; + +import java.text.MessageFormat; + +// snippet-start:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart] +// Send a prompt to Meta Llama 3 and print the response stream in real-time. +public class InvokeModelWithResponseStreamQuickstart { + + public static void main(String[] args) { + + // Create a Bedrock Runtime client in the AWS Region of your choice. + var client = BedrockRuntimeAsyncClient.builder() + .region(Region.US_WEST_2) + .build(); + + // Set the model ID, e.g., Llama 3 8B Instruct. + var modelId = "meta.llama3-8b-instruct-v1:0"; + + // Define the user message to send. + var userMessage = "Describe the purpose of a 'hello world' program in one line."; + + // Embed the message in Llama 3's prompt format. + var prompt = MessageFormat.format(""" + <|begin_of_text|> + <|start_header_id|>user<|end_header_id|> + {0} + <|eot_id|> + <|start_header_id|>assistant<|end_header_id|> + """, userMessage); + + // Create a JSON payload using the model's native structure. + var request = new JSONObject() + .put("prompt", prompt) + // Optional inference parameters: + .put("max_gen_len", 512) + .put("temperature", 0.5F) + .put("top_p", 0.9F); + + // Create a handler to extract and print the response text in real-time. + var streamHandler = InvokeModelWithResponseStreamResponseHandler.builder() + .subscriber(event -> event.accept( + InvokeModelWithResponseStreamResponseHandler.Visitor.builder() + .onChunk(c -> { + var chunk = new JSONObject(c.bytes().asUtf8String()); + if (chunk.has("generation")) { + System.out.print(chunk.getString("generation")); + } + }).build()) + ).build(); + + // Encode and send the request. Let the stream handler process the response. + client.invokeModelWithResponseStream(req -> req + .body(SdkBytes.fromUtf8String(request.toString())) + .modelId(modelId), streamHandler + ).join(); + } +} +// Learn more about the Llama 3 prompt format at: +// https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 + +// snippet-end:[bedrock-runtime.java2.InvokeModelWithResponseStream_Llama3_Quickstart] diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java new file mode 100644 index 00000000000..17f965fe9a6 --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama2.java @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import com.example.bedrockruntime.models.meta.llama2.InvokeModelQuickstart; +import com.example.bedrockruntime.models.meta.llama2.InvokeModelWithResponseStreamQuickstart; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +@Tag("IntegrationTest") +@TestInstance(TestInstance.Lifecycle.PER_METHOD) +public class TestLlama2 { + + @Test + void InvokeModel() { + assertDoesNotThrow(() -> InvokeModelQuickstart.main(null)); + } + + @Test + void InvokeModelWithResponseStream() { + assertDoesNotThrow(() -> InvokeModelWithResponseStreamQuickstart.main(null)); + } + +} diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java new file mode 100644 index 00000000000..5277dcaf592 --- /dev/null +++ b/javav2/example_code/bedrock-runtime/src/test/java/TestLlama3.java @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import com.example.bedrockruntime.models.meta.llama3.InvokeModelQuickstart; +import com.example.bedrockruntime.models.meta.llama3.InvokeModelWithResponseStreamQuickstart; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +@Tag("IntegrationTest") +@TestInstance(TestInstance.Lifecycle.PER_METHOD) +public class TestLlama3 { + + @Test + void InvokeModel() { + assertDoesNotThrow(() -> InvokeModelQuickstart.main(null)); + } + + @Test + void InvokeModelWithResponseStream() { + assertDoesNotThrow(() -> InvokeModelWithResponseStreamQuickstart.main(null)); + } + +} diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java index 652689c227a..91ba1db87ff 100644 --- a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java +++ b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextAsyncTest.java @@ -58,15 +58,6 @@ void InvokeJurassic2Async() { System.out.println("Test async invoke Jurassic-2 passed."); } - @Test - @Tag("IntegrationTest") - void InvokeLlama2Async() { - var prompt = "In one sentence, what is a large-language model?"; - var generatedText = InvokeModelAsync.invokeLlama2(prompt); - assertNotNullOrEmpty(generatedText); - System.out.println("Test async invoke Llama 2 passed."); - } - @Test @Tag("IntegrationTest") void InvokeClaude3WithResponseStream() { diff --git a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java index 396cb7fd565..3d70c9b7dc7 100644 --- a/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java +++ b/javav2/example_code/bedrock-runtime/src/test/java/TextToTextSyncTest.java @@ -53,12 +53,4 @@ void InvokeJurassic2() { assertNotNullOrEmpty(completion); System.out.println("Test sync invoke Jurassic-2 passed."); } - - @Test - @Tag("IntegrationTest") - void InvokeLlama2() { - String completion = InvokeModel.invokeLlama2("In one sentence, what is a large-language model?"); - assertNotNullOrEmpty(completion); - System.out.println("Test sync invoke Llama 2 passed."); - } } diff --git a/python/example_code/bedrock-runtime/README.md b/python/example_code/bedrock-runtime/README.md index 6feee8bc3af..2d3fd43ff1b 100644 --- a/python/example_code/bedrock-runtime/README.md +++ b/python/example_code/bedrock-runtime/README.md @@ -39,15 +39,18 @@ python -m pip install -r requirements.txt ### Invoke model examples - [AI21 Labs Jurassic-2: Text generation](bedrock_runtime_wrapper.py#L79) -- [Amazon Titan: Image generation](bedrock_runtime_wrapper.py#L275) -- [Anthropic Claude 2: Real-time response stream processing](bedrock_runtime_wrapper.py#L320) +- [Amazon Titan: Image generation](bedrock_runtime_wrapper.py#L238) +- [Anthropic Claude 2: Real-time response stream processing](bedrock_runtime_wrapper.py#L283) - [Anthropic Claude 2: Text generation](bedrock_runtime_wrapper.py#L39) - [Anthropic Claude 3: Multimodal invocation](models/anthropic/claude_3.py#L94) - [Anthropic Claude 3: Text generation](models/anthropic/claude_3.py#L33) -- [Meta Llama 2: Text generation](bedrock_runtime_wrapper.py#L115) -- [Mistral AI: Text generation with Mistral 7B Instruct](bedrock_runtime_wrapper.py#L152) -- [Mistral AI: Text generation with Mixtral 8x7B Instruct](bedrock_runtime_wrapper.py#L192) -- [Stable Diffusion: Image generation](bedrock_runtime_wrapper.py#L232) +- [Meta Llama 2: Text generation](models/meta/llama2/invoke_model_quickstart.py#L4) +- [Meta Llama 2: Text generation with response stream](models/meta/llama2/invoke_model_with_response_stream_quickstart.py#L4) +- [Meta Llama 3: Text generation](models/meta/llama3/invoke_model_quickstart.py#L4) +- [Meta Llama 3: Text generation with response stream](models/meta/llama3/invoke_model_with_response_stream_quickstart.py#L4) +- [Mistral AI: Text generation with Mistral 7B Instruct](bedrock_runtime_wrapper.py#L115) +- [Mistral AI: Text generation with Mixtral 8x7B Instruct](bedrock_runtime_wrapper.py#L155) +- [Stable Diffusion: Image generation](bedrock_runtime_wrapper.py#L195) diff --git a/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py b/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py index f1ce10bcf32..abfea8f5f71 100644 --- a/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py +++ b/python/example_code/bedrock-runtime/bedrock_runtime_wrapper.py @@ -112,43 +112,6 @@ def invoke_jurassic2(self, prompt): # snippet-end:[python.example_code.bedrock-runtime.InvokeAi21Jurassic2] - # snippet-start:[python.example_code.bedrock-runtime.InvokeMetaLlama2] - def invoke_llama2(self, prompt): - """ - Invokes the Meta Llama 2 large-language model to run an inference - using the input provided in the request body. - - :param prompt: The prompt that you want Llama 2 to complete. - :return: Inference response from the model. - """ - - try: - # The different model providers have individual request and response formats. - # For the format, ranges, and default values for Meta Llama 2 Chat, refer to: - # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.html - - body = { - "prompt": prompt, - "temperature": 0.5, - "top_p": 0.9, - "max_gen_len": 512, - } - - response = self.bedrock_runtime_client.invoke_model( - modelId="meta.llama2-13b-chat-v1", body=json.dumps(body) - ) - - response_body = json.loads(response["body"].read()) - completion = response_body["generation"] - - return completion - - except ClientError: - logger.error("Couldn't invoke Llama 2") - raise - - # snippet-end:[python.example_code.bedrock-runtime.InvokeMetaLlama2] - # snippet-start:[python.example_code.bedrock-runtime.InvokeMistral7B] def invoke_mistral_7b(self, prompt): """ diff --git a/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py new file mode 100644 index 00000000000..7b5c2bc1d65 --- /dev/null +++ b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_quickstart.py @@ -0,0 +1,44 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart] +# Send a prompt to Meta Llama 2 and print the response. + +import boto3 +import json + +# Create a Bedrock Runtime client in the AWS Region of your choice. +client = boto3.client("bedrock-runtime", region_name="us-west-2") + +# Set the model ID, e.g., Llama 2 Chat 13B. +model_id = "meta.llama2-13b-chat-v1" + +# Define the user message to send. +user_message = "Describe the purpose of a 'hello world' program in one line." + +# Embed the message in Llama 2's prompt format. +prompt = f"[INST] {user_message} [/INST]" + +# Format the request payload using the model's native structure. +request = { + "prompt": prompt, + # Optional inference parameters: + "max_gen_len": 512, + "temperature": 0.5, + "top_p": 0.9, +} + +# Encode and send the request. +response = client.invoke_model(body=json.dumps(request), modelId=model_id) + +# Decode the native response body. +model_response = json.loads(response["body"].read()) + +# Extract and print the generated text. +response_text = model_response["generation"] +print(response_text) + +# Learn more about the Llama 2 prompt format at: +# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2 + +# snippet-end:[python.example_code.bedrock-runtime.InvokeModel_Llama2_Quickstart] diff --git a/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py new file mode 100644 index 00000000000..92961a3d895 --- /dev/null +++ b/python/example_code/bedrock-runtime/models/meta/llama2/invoke_model_with_response_stream_quickstart.py @@ -0,0 +1,46 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart] +# Send a prompt to Meta Llama 2 and print the response stream in real-time. + +import boto3 +import json + +# Create a Bedrock Runtime client in the AWS Region of your choice. +client = boto3.client("bedrock-runtime", region_name="us-west-2") + +# Set the model ID, e.g., Llama 2 Chat 13B. +model_id = "meta.llama2-13b-chat-v1" + +# Define the user message to send. +user_message = "Describe the purpose of a 'hello world' program in one line." + +# Embed the message in Llama 2's prompt format. +prompt = f"[INST] {user_message} [/INST]" + +# Format the request payload using the model's native structure. +request = { + "prompt": prompt, + # Optional inference parameters: + "max_gen_len": 512, + "temperature": 0.5, + "top_p": 0.9, +} + +# Encode and send the request. +response_stream = client.invoke_model_with_response_stream( + body=json.dumps(request), + modelId=model_id, +) + +# Extract and print the response text in real-time. +for event in response_stream["body"]: + chunk = json.loads(event["chunk"]["bytes"]) + if "generation" in chunk: + print(chunk["generation"], end="") + +# Learn more about the Llama 2 prompt format at: +# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2 + +# snippet-end:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama2_Quickstart] diff --git a/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py new file mode 100644 index 00000000000..9aeceb38549 --- /dev/null +++ b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_quickstart.py @@ -0,0 +1,50 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart] +# Send a prompt to Meta Llama 3 and print the response. + +import boto3 +import json + +# Create a Bedrock Runtime client in the AWS Region of your choice. +client = boto3.client("bedrock-runtime", region_name="us-west-2") + +# Set the model ID, e.g., Llama 3 8B Instruct. +model_id = "meta.llama3-8b-instruct-v1:0" + +# Define the user message to send. +user_message = "Describe the purpose of a 'hello world' program in one line." + +# Embed the message in Llama 3's prompt format. +prompt = f""" +<|begin_of_text|> +<|start_header_id|>user<|end_header_id|> +{user_message} +<|eot_id|> +<|start_header_id|>assistant<|end_header_id|> +""" + +# Format the request payload using the model's native structure. +request = { + "prompt": prompt, + # Optional inference parameters: + "max_gen_len": 512, + "temperature": 0.5, + "top_p": 0.9, +} + +# Encode and send the request. +response = client.invoke_model(body=json.dumps(request), modelId=model_id) + +# Decode the native response body. +model_response = json.loads(response["body"].read()) + +# Extract and print the generated text. +response_text = model_response["generation"] +print(response_text) + +# Learn more about the Llama 3 prompt format in the documentation: +# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 + +# snippet-end:[python.example_code.bedrock-runtime.InvokeModel_Llama3_Quickstart] diff --git a/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py new file mode 100644 index 00000000000..c86ad3a422f --- /dev/null +++ b/python/example_code/bedrock-runtime/models/meta/llama3/invoke_model_with_response_stream_quickstart.py @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart] +# Send a prompt to Meta Llama 3 and print the response stream in real-time. + +import boto3 +import json + +# Create a Bedrock Runtime client in the AWS Region of your choice. +client = boto3.client("bedrock-runtime", region_name="us-west-2") + +# Set the model ID, e.g., Llama 3 8B Instruct. +model_id = "meta.llama3-8b-instruct-v1:0" + +# Define the user message to send. +user_message = "Describe the purpose of a 'hello world' program in one line." + +# Embed the message in Llama 3's prompt format. +prompt = f""" +<|begin_of_text|> +<|start_header_id|>user<|end_header_id|> +{user_message} +<|eot_id|> +<|start_header_id|>assistant<|end_header_id|> +""" + +# Format the request payload using the model's native structure. +request = { + "prompt": prompt, + # Optional inference parameters: + "max_gen_len": 512, + "temperature": 0.5, + "top_p": 0.9, +} + +# Encode and send the request. +response_stream = client.invoke_model_with_response_stream( + body=json.dumps(request), + modelId=model_id, +) + +# Extract and print the response text in real-time. +for event in response_stream["body"]: + chunk = json.loads(event["chunk"]["bytes"]) + if "generation" in chunk: + print(chunk["generation"], end="") + +# Learn more about the Llama 3 prompt format at: +# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#special-tokens-used-with-meta-llama-3 + +# snippet-end:[python.example_code.bedrock-runtime.InvokeModelWithResponseStream_Llama3_Quickstart] diff --git a/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py b/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py new file mode 100644 index 00000000000..966e001f706 --- /dev/null +++ b/python/example_code/bedrock-runtime/test/models/meta/test_invoke_model_quickstarts.py @@ -0,0 +1,62 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Integration tests for: +- models/meta/llama2/invoke_model_quickstart.py +- models/meta/llama3/invoke_model_quickstart.py +""" + +import pytest +import subprocess +import sys + + +@pytest.mark.integ +def test_llama2_quickstart(): + result = subprocess.run( + [sys.executable, "models/meta/llama2/invoke_model_quickstart.py"], + capture_output=True, + text=True, + ) + assert result.stdout != "" + assert result.returncode == 0 + + +@pytest.mark.integ +def test_llama2_with_response_stream_quickstart(): + result = subprocess.run( + [ + sys.executable, + "models/meta/llama2/invoke_model_with_response_stream_quickstart.py", + ], + capture_output=True, + text=True, + ) + assert result.stdout != "" + assert result.returncode == 0 + + +@pytest.mark.integ +def test_llama3_quickstart(): + result = subprocess.run( + [sys.executable, "models/meta/llama2/invoke_model_quickstart.py"], + capture_output=True, + text=True, + ) + assert result.stdout != "" + assert result.returncode == 0 + + +@pytest.mark.integ +def test_llama3_with_response_stream_quickstart(): + result = subprocess.run( + [ + sys.executable, + "models/meta/llama3/invoke_model_with_response_stream_quickstart.py", + ], + capture_output=True, + text=True, + ) + assert result.stdout != "" + assert result.returncode == 0 diff --git a/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py b/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py index 171e021d6ea..2be67b0547e 100644 --- a/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py +++ b/python/example_code/bedrock-runtime/test/test_bedrock_runtime_wrapper.py @@ -98,27 +98,6 @@ def test_invoke_jurassic2(make_stubber, error_code): assert exc_info.value.response["Error"]["Code"] == error_code -@pytest.mark.parametrize("error_code", [None, "ClientError"]) -def test_invoke_llama2(make_stubber, error_code): - bedrock_runtime = boto3.client( - service_name="bedrock-runtime", region_name="us-east-1" - ) - bedrock_runtime_stubber = make_stubber(bedrock_runtime) - wrapper = BedrockRuntimeWrapper(bedrock_runtime) - - prompt = "Hey, how are you?" - - bedrock_runtime_stubber.stub_invoke_llama2(prompt, error_code=error_code) - - if error_code is None: - got_completion = wrapper.invoke_llama2(prompt) - assert len(got_completion) > 0 - else: - with pytest.raises(ClientError) as exc_info: - wrapper.invoke_llama2(prompt) - assert exc_info.value.response["Error"]["Code"] == error_code - - @pytest.mark.asyncio @pytest.mark.parametrize("error_code", ["ClientError"]) async def test_invoke_model_with_response_stream(make_stubber, error_code): diff --git a/python/test_tools/bedrock_runtime_stubber.py b/python/test_tools/bedrock_runtime_stubber.py index ed2af9a51d9..6ac95919308 100644 --- a/python/test_tools/bedrock_runtime_stubber.py +++ b/python/test_tools/bedrock_runtime_stubber.py @@ -118,24 +118,6 @@ def stub_invoke_jurassic2(self, prompt, error_code=None): "invoke_model", expected_params, response, error_code=error_code ) - def stub_invoke_llama2(self, prompt, error_code=None): - expected_params = { - "modelId": "meta.llama2-13b-chat-v1", - "body": json.dumps( - {"prompt": prompt, "temperature": 0.5, "top_p": 0.9, "max_gen_len": 512} - ), - } - - response_body = io.BytesIO( - json.dumps({"generation": "Fake completion response."}).encode("utf-8") - ) - - response = {"body": response_body, "contentType": ""} - - self._stub_bifurcator( - "invoke_model", expected_params, response, error_code=error_code - ) - def stub_invoke_model_with_response_stream(self, prompt, error_code=None): expected_params = { "modelId": "anthropic.claude-v2",