opensearch-project · hmumtazz · Nov 15, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
@@ -46,6 +46,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 - updating listing file with three v2 sparse model - by @dhrubo-os ([#412](https://github.com/opensearch-project/opensearch-py-ml/pull/412))
 - Update model upload history -  opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini (v.1.0.0)(TORCH_SCRIPT) by @dhrubo-os ([#417](https://github.com/opensearch-project/opensearch-py-ml/pull/417))
 - Update model upload history -  opensearch-project/opensearch-neural-sparse-encoding-v2-distill (v.1.0.0)(TORCH_SCRIPT) by @dhrubo-os ([#419](https://github.com/opensearch-project/opensearch-py-ml/pull/419))
+- Added RAG functionality into `opensearch-py-ml` by @hmumtazz in ([#427](https://github.com/opensearch-project/opensearch-py-ml/pull/427))
 
 ### Fixed
 - Fix the wrong final zip file name in model_uploader workflow, now will name it by the upload_prefix alse.([#413](https://github.com/opensearch-project/opensearch-py-ml/pull/413/files))

diff --git a/opensearch_py_ml/ml_commons/rag_pipeline/rag/IAMRoleHelper.py b/opensearch_py_ml/ml_commons/rag_pipeline/rag/IAMRoleHelper.py
@@ -0,0 +1,220 @@
+# SPDX-License-Identifier: Apache-2.0
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Any modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import boto3
+import json
+from botocore.exceptions import ClientError
+import requests
+
+class IAMRoleHelper:
+    def __init__(self, region, opensearch_domain_url=None, opensearch_domain_username=None,
+                 opensearch_domain_password=None, aws_user_name=None, aws_role_name=None, opensearch_domain_arn=None):
+        self.region = region
+        self.opensearch_domain_url = opensearch_domain_url
+        self.opensearch_domain_username = opensearch_domain_username
+        self.opensearch_domain_password = opensearch_domain_password
+        self.aws_user_name = aws_user_name
+        self.aws_role_name = aws_role_name
+        self.opensearch_domain_arn = opensearch_domain_arn
+
+    def role_exists(self, role_name):
+        iam_client = boto3.client('iam')
+
+        try:
+            iam_client.get_role(RoleName=role_name)
+            return True
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchEntity':
+                return False
+            else:
+                print(f"An error occurred: {e}")
+                return False
+
+    def delete_role(self, role_name):
+        iam_client = boto3.client('iam')
+
+        try:
+            # Detach managed policies
+            policies = iam_client.list_attached_role_policies(RoleName=role_name)['AttachedPolicies']
+            for policy in policies:
+                iam_client.detach_role_policy(RoleName=role_name, PolicyArn=policy['PolicyArn'])
+            print(f'All managed policies detached from role {role_name}.')
+
+            # Delete inline policies
+            inline_policies = iam_client.list_role_policies(RoleName=role_name)['PolicyNames']
+            for policy_name in inline_policies:
+                iam_client.delete_role_policy(RoleName=role_name, PolicyName=policy_name)
+            print(f'All inline policies deleted from role {role_name}.')
+
+            # Now, delete the role
+            iam_client.delete_role(RoleName=role_name)
+            print(f'Role {role_name} deleted.')
+
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchEntity':
+                print(f'Role {role_name} does not exist.')
+            else:
+                print(f"An error occurred: {e}")
+
+    def create_iam_role(self, role_name, trust_policy_json, inline_policy_json):
+        iam_client = boto3.client('iam')
+
+        try:
+            # Create the role with the trust policy
+            create_role_response = iam_client.create_role(
+                RoleName=role_name,
+                AssumeRolePolicyDocument=json.dumps(trust_policy_json),
+                Description='Role with custom trust and inline policies',
+            )
+
+            # Get the ARN of the newly created role
+            role_arn = create_role_response['Role']['Arn']
+
+            # Attach the inline policy to the role
+            iam_client.put_role_policy(
+                RoleName=role_name,
+                PolicyName='InlinePolicy',  # you can replace this with your preferred policy name
+                PolicyDocument=json.dumps(inline_policy_json)
+            )
+
+            print(f'Created role: {role_name}')
+            return role_arn
+
+        except ClientError as e:
+            print(f"Error creating the role: {e}")
+            return None
+
+    def get_role_arn(self, role_name):
+        if not role_name:
+            return None
+        iam_client = boto3.client('iam')
+        try:
+            response = iam_client.get_role(RoleName=role_name)
+            # Return ARN of the role
+            return response['Role']['Arn']
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchEntity':
+                print(f"The requested role {role_name} does not exist")
+                return None
+            else:
+                print(f"An error occurred: {e}")
+                return None
+
+    def get_role_details(self, role_name):
+        iam = boto3.client('iam')
+
+        try:
+            response = iam.get_role(RoleName=role_name)
+            role = response['Role']
+
+            print(f"Role Name: {role['RoleName']}")
+            print(f"Role ID: {role['RoleId']}")
+            print(f"ARN: {role['Arn']}")
+            print(f"Creation Date: {role['CreateDate']}")
+            print("Assume Role Policy Document:")
+            print(json.dumps(role['AssumeRolePolicyDocument'], indent=4, sort_keys=True))
+
+            list_role_policies_response = iam.list_role_policies(RoleName=role_name)
+
+            for policy_name in list_role_policies_response['PolicyNames']:
+                get_role_policy_response = iam.get_role_policy(RoleName=role_name, PolicyName=policy_name)
+                print(f"Role Policy Name: {get_role_policy_response['PolicyName']}")
+                print("Role Policy Document:")
+                print(json.dumps(get_role_policy_response['PolicyDocument'], indent=4, sort_keys=True))
+
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchEntity':
+                print(f'Role {role_name} does not exist.')
+            else:
+                print(f"An error occurred: {e}")
+
+    def get_user_arn(self, username):
+        if not username:
+            return None
+        iam_client = boto3.client('iam')
+
+        try:
+            response = iam_client.get_user(UserName=username)
+            user_arn = response['User']['Arn']
+            return user_arn
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'NoSuchEntity':
+                print(f"IAM user '{username}' not found.")
+                return None
+            else:
+                print(f"An error occurred: {e}")
+                return None
+
+    def assume_role(self, role_arn, role_session_name="your_session_name"):
+        sts_client = boto3.client('sts')
+
+        try:
+            assumed_role_object = sts_client.assume_role(
+                RoleArn=role_arn,
+                RoleSessionName=role_session_name,
+            )
+
+            # Obtain the temporary credentials from the assumed role 
+            temp_credentials = assumed_role_object["Credentials"]
+
+            return temp_credentials
+        except ClientError as e:
+            print(f"Error assuming role: {e}")
+            return None
+
+    def map_iam_role_to_backend_role(self, iam_role_arn):
+        os_security_role = 'ml_full_access'  # Changed from 'all_access' to 'ml_full_access'
+        url = f'{self.opensearch_domain_url}/_plugins/_security/api/rolesmapping/{os_security_role}'
+
+        payload = {
+            "backend_roles": [iam_role_arn]
+        }
+        headers = {'Content-Type': 'application/json'}
+
+        try:
+            response = requests.put(
+                url,
+                auth=(self.opensearch_domain_username, self.opensearch_domain_password),
+                json=payload,
+                headers=headers,
+                verify=True
+            )
+
+            if response.status_code == 200:
+                print(f"Successfully mapped IAM role to OpenSearch role '{os_security_role}'.")
+            else:
+                print(f"Failed to map IAM role to OpenSearch role '{os_security_role}'. Status code: {response.status_code}")
+                print(f"Response: {response.text}")
+        except requests.exceptions.RequestException as e:
+            print(f"HTTP request failed: {e}")
+
+    def get_iam_user_name_from_arn(self, iam_principal_arn):
+        """
+        Extract the IAM user name from the IAM principal ARN.
+        """
+        # IAM user ARN format: arn:aws:iam::123456789012:user/user-name
+        if iam_principal_arn and ':user/' in iam_principal_arn:
+            return iam_principal_arn.split(':user/')[-1]
+        else:
+            return None
diff --git a/opensearch_py_ml/ml_commons/rag_pipeline/rag/SecretsHelper.py b/opensearch_py_ml/ml_commons/rag_pipeline/rag/SecretsHelper.py
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: Apache-2.0
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Any modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import logging
+import boto3
+import json
+from botocore.exceptions import ClientError
+
+logger = logging.getLogger(__name__)
+
+class SecretHelper:
+    def __init__(self, region):
+        self.region = region
+
+    def secret_exists(self, secret_name):
+        secretsmanager = boto3.client('secretsmanager', region_name=self.region)
+        try:
+            secretsmanager.get_secret_value(SecretId=secret_name)
+            return True
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'ResourceNotFoundException':
+                return False
+            else:
+                logger.error(f"An error occurred: {e}")
+                return False
+
+    def get_secret_arn(self, secret_name):
+        secretsmanager = boto3.client('secretsmanager', region_name=self.region)
+        try:
+            response = secretsmanager.describe_secret(SecretId=secret_name)
+            return response['ARN']
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'ResourceNotFoundException':
+                logger.warning(f"The requested secret {secret_name} was not found")
+                return None
+            else:
+                logger.error(f"An error occurred: {e}")
+                return None
+
+    def get_secret(self, secret_name):
+        secretsmanager = boto3.client('secretsmanager', region_name=self.region)
+        try:
+            response = secretsmanager.get_secret_value(SecretId=secret_name)
+            return response.get('SecretString')
+        except ClientError as e:
+            if e.response['Error']['Code'] == 'ResourceNotFoundException':
+                logger.warning("The requested secret was not found")
+                return None
+            else:
+                logger.error(f"An error occurred: {e}")
+                return None
+
+    def create_secret(self, secret_name, secret_value):
+        secretsmanager = boto3.client('secretsmanager', region_name=self.region)
+        try:
+            response = secretsmanager.create_secret(
+                Name=secret_name,
+                SecretString=json.dumps(secret_value),
+            )
+            logger.info(f'Secret {secret_name} created successfully.')
+            return response['ARN']
+        except ClientError as e:
+            logger.error(f'Error creating secret: {e}')
+            return None