From af61b9df5f05c43ff37b34fd8129f38e6d7a22c4 Mon Sep 17 00:00:00 2001 From: tkykenmt Date: Tue, 3 Dec 2024 14:34:25 +0900 Subject: [PATCH 1/2] add tutorials for cross encoder models on Amazon Bedrock Signed-off-by: tkykenmt --- ...h_Amazon_Rerank_model_on_Amazon_Bedrock.md | 622 +++++++++++++++++ ...h_Cohere_Rerank_model_on_Amazon_Bedrock.md | 625 ++++++++++++++++++ 2 files changed, 1247 insertions(+) create mode 100644 docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md create mode 100644 docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md diff --git a/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md b/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md new file mode 100644 index 0000000000..8036ea23d1 --- /dev/null +++ b/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md @@ -0,0 +1,622 @@ +# Topic + +[Reranking pipeline](https://opensearch.org/docs/latest/search-plugins/search-relevance/reranking-search-results/) is a feature released in OpenSearch 2.12. +It can rerank search results, providing a relevance score for each document in the search results with respect to the search query. +The relevance score is calculated by a cross-encoder model. + +This tutorial explains how to use the [Amazon Rerank 1.0 model in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html) in a reranking pipeline. + +Note: Replace the placeholders that start with `your_` with your own values. + +# Steps + +## 0. Test the model on Amazon Bedrock +You can perform a reranking test with the following code. + +```python +import json +import boto3 +bedrock_region = "your_bedrock_model_region_like_us-west-2" +bedrock_runtime_client = boto3.client("bedrock-runtime", region_name=bedrock_region) + +modelId = "amazon.rerank-v1:0" +contentType = "application/json" +accept = "*/*" + +body = json.dumps({ + "query": "What is the capital city of America?", + "documents": [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] +}) + +response = bedrock_runtime_client.invoke_model( + modelId=modelId, + contentType=contentType, + accept=accept, + body=body +) +results = json.loads(response.get('body').read())["results"] +print(json.dumps(sorted(results, key=lambda x: x['index']),indent=2)) +``` + +The reranking results are as follows: + +``` +[ + { + "index": 0, + "relevance_score": 0.0025114635138098534 + }, + { + "index": 1, + "relevance_score": 2.4876490010363496e-05 + }, + { + "index": 2, + "relevance_score": 0.7711548724998493 + }, + { + "index": 3, + "relevance_score": 6.339210403977635e-06 + } +] +``` + +## 1. Create a connector and register the model + +To create a connector for the model, send the following request. If you are using self-managed OpenSearch, supply your AWS credentials: +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Amazon Bedrock cross-encoder model", + "description": "Test connector for Amazon Bedrock cross-encoder model", + "version": 1, + "protocol": "aws_sigv4", + "credential": { + "access_key": "your_access_key", + "secret_key": "your_secret_key", + "session_token": "your_session_token" + }, + "parameters": { + "region": "your_bedrock_model_region_like_us-west-2", + "service_name": "bedrock" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.rerank-v1:0/invoke", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "pre_process_function": """ + def query_text = params.query_text; + def text_docs = params.text_docs; + def textDocsBuilder = new StringBuilder('['); + for (int i=0; i"], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` + +The first document in the response is `Carson City is the capital city of the American state of Nevada`, which is incorrect: +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 2.5045562, + "hits": [ + { + "_index": "my-test-data", + "_id": "1", + "_score": 2.5045562, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 0.5807494, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.5261191, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 0.5083029, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + } +} +``` + +Next, test the query using the reranking pipeline: +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_sagemaker +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text": "What is the capital city of America?" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` + +The first document in the response is `"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."`, which is correct: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 0.7711549, + "hits": [ + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.7711549, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "1", + "_score": 0.0025114636, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 02.487649e-05, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 6.3392104e-06, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + }, + "profile": { + "shards": [] + } +} +``` + +Note: You can avoid writing the query twice by using query_text_path instead of query_text, as follows: +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_sagemaker +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text_path": "query.match.passage_text.query" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` diff --git a/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md b/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md new file mode 100644 index 0000000000..2cc62d74eb --- /dev/null +++ b/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md @@ -0,0 +1,625 @@ +# Topic + +[Reranking pipeline](https://opensearch.org/docs/latest/search-plugins/search-relevance/reranking-search-results/) is a feature released in OpenSearch 2.12. +It can rerank search results, providing a relevance score for each document in the search results with respect to the search query. +The relevance score is calculated by a cross-encoder model. + +This tutorial explains how to use the [Cohere Rerank 3.5 model in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html) in a reranking pipeline. + +Note: Replace the placeholders that start with `your_` with your own values. + +# Steps + +## 0. Test the model on Amazon Bedrock +You can perform a reranking test with the following code. + +```python +import json +import boto3 +bedrock_region = "your_bedrock_model_region_like_us-west-2" +bedrock_runtime_client = boto3.client("bedrock-runtime", region_name=bedrock_region) + +modelId = "cohere.rerank-v3-5:0" +contentType = "application/json" +accept = "*/*" + +body = json.dumps({ + "query": "What is the capital city of America?", + "documents": [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ], + "api_version": 2 +}) + +response = bedrock_runtime_client.invoke_model( + modelId=modelId, + contentType=contentType, + accept=accept, + body=body +) +results = json.loads(response.get('body').read())["results"] +print(json.dumps(sorted(results, key=lambda x: x['index']),indent=2)) +``` + +The reranking results are as follows: + +``` +[ + { + "index": 0, + "relevance_score": 0.32418242 + }, + { + "index": 1, + "relevance_score": 0.07456104 + }, + { + "index": 2, + "relevance_score": 0.7190094 + }, + { + "index": 3, + "relevance_score": 0.06124987 + } +] +``` + +## 1. Create a connector and register the model + +To create a connector for the model, send the following request. If you are using self-managed OpenSearch, supply your AWS credentials: +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Amazon Bedrock cross-encoder model", + "description": "Test connector for Amazon Bedrock cross-encoder model", + "version": 1, + "protocol": "aws_sigv4", + "credential": { + "access_key": "your_access_key", + "secret_key": "your_secret_key", + "session_token": "your_session_token" + }, + "parameters": { + "region": "your_bedrock_model_region_like_us-west-2", + "service_name": "bedrock", + "api_version": 2 + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/cohere.rerank-v3-5:0/invoke", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "pre_process_function": """ + def query_text = params.query_text; + def text_docs = params.text_docs; + def textDocsBuilder = new StringBuilder('['); + for (int i=0; i"], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` + +The first document in the response is `Carson City is the capital city of the American state of Nevada`, which is incorrect: +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 2.5045562, + "hits": [ + { + "_index": "my-test-data", + "_id": "1", + "_score": 2.5045562, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 0.5807494, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.5261191, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 0.5083029, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + } +} +``` + +Next, test the query using the reranking pipeline: +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_sagemaker +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text": "What is the capital city of America?" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` + +The first document in the response is `"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."`, which is correct: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 0.7190094, + "hits": [ + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.7190094, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "1", + "_score": 0.32418242, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 0.07456104, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 0.06124987, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + }, + "profile": { + "shards": [] + } +} +``` + +Note: You can avoid writing the query twice by using query_text_path instead of query_text, as follows: +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_sagemaker +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text_path": "query.match.passage_text.query" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` From ecef600b235af7ce31fef17f9d57b5e814f88e60 Mon Sep 17 00:00:00 2001 From: tkykenmt Date: Tue, 3 Dec 2024 14:52:39 +0900 Subject: [PATCH 2/2] fix bedrock url to reflect region parameter Signed-off-by: tkykenmt --- ...ank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md | 4 ++-- ...ank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md b/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md index 8036ea23d1..d1434fe011 100644 --- a/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md +++ b/docs/tutorials/rerank/rerank_pipeline_with_Amazon_Rerank_model_on_Amazon_Bedrock.md @@ -89,7 +89,7 @@ POST /_plugins/_ml/connectors/_create { "action_type": "predict", "method": "POST", - "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.rerank-v1:0/invoke", + "url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/amazon.rerank-v1:0/invoke", "headers": { "x-amz-content-sha256": "required", "content-type": "application/json" @@ -159,7 +159,7 @@ POST /_plugins/_ml/connectors/_create { "action_type": "predict", "method": "POST", - "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/amazon.rerank-v1:0/invoke", + "url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/amazon.rerank-v1:0/invoke", "headers": { "x-amz-content-sha256": "required", "content-type": "application/json" diff --git a/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md b/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md index 2cc62d74eb..abe34f2b9e 100644 --- a/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md +++ b/docs/tutorials/rerank/rerank_pipeline_with_Cohere_Rerank_model_on_Amazon_Bedrock.md @@ -91,7 +91,7 @@ POST /_plugins/_ml/connectors/_create { "action_type": "predict", "method": "POST", - "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/cohere.rerank-v3-5:0/invoke", + "url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/cohere.rerank-v3-5:0/invoke", "headers": { "x-amz-content-sha256": "required", "content-type": "application/json" @@ -162,7 +162,7 @@ POST /_plugins/_ml/connectors/_create { "action_type": "predict", "method": "POST", - "url": "https://bedrock-runtime.us-west-2.amazonaws.com/model/cohere.rerank-v3-5:0/invoke", + "url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/cohere.rerank-v3-5:0/invoke", "headers": { "x-amz-content-sha256": "required", "content-type": "application/json"