Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #2327] Configure API/backend to work with OpenSearch #2450

Merged
merged 16 commits into from
Oct 15, 2024
Merged
8 changes: 3 additions & 5 deletions .github/workflows/cd-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ on:
push:
branches:
- "main"
- "chouinar/2327-api-opensearch"
paths:
- "api/**"
- ".github/**"
release:
types: [published]
workflow_dispatch:
Expand All @@ -22,18 +24,14 @@ on:
- prod

jobs:
api-checks:
name: Run API Checks
uses: ./.github/workflows/ci-api.yml

deploy:
name: Deploy
needs: api-checks
uses: ./.github/workflows/deploy.yml
strategy:
max-parallel: 1
matrix:
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || github.ref_name == 'main' && fromJSON('["dev", "staging"]') || fromJSON('["dev"]') }}
envs: [ "dev" ]
with:
app_name: "api"
environment: ${{ matrix.envs }}
8 changes: 1 addition & 7 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,11 @@ jobs:
app_name: ${{ inputs.app_name }}
ref: ${{ github.ref }}

database-migrations:
name: Database migrations
uses: ./.github/workflows/database-migrations.yml
with:
app_name: ${{ inputs.app_name }}
environment: ${{ inputs.environment }}

deploy:
name: Deploy
runs-on: ubuntu-latest
needs: [database-migrations]
needs: [build-and-publish]
permissions:
contents: read
id-token: write
Expand Down
10 changes: 5 additions & 5 deletions api/local.env
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ HIDE_SQL_PARAMETER_LOGS=TRUE
# Opensearch Environment Variables
############################

OPENSEARCH_HOST=opensearch-node
OPENSEARCH_PORT=9200
OPENSEARCH_USE_SSL=FALSE
OPENSEARCH_VERIFY_CERTS=FALSE
SEARCH_ENDPOINT=opensearch-node
SEARCH_PORT=9200
SEARCH_USE_SSL=FALSE
SEARCH_VERIFY_CERTS=FALSE

############################
# AWS Defaults
Expand Down Expand Up @@ -126,4 +126,4 @@ IS_LOCAL_FOREIGN_TABLE=true
############################

# File path for the export_opportunity_data task
EXPORT_OPP_DATA_FILE_PATH=/tmp
EXPORT_OPP_DATA_FILE_PATH=/tmp
43 changes: 32 additions & 11 deletions api/src/adapters/search/opensearch_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import boto3
import opensearchpy
from requests.auth import HTTPBasicAuth

from src.adapters.search.opensearch_config import OpensearchConfig, get_opensearch_config
from src.adapters.search.opensearch_response import SearchResponse
Expand Down Expand Up @@ -252,25 +253,45 @@ def scroll(
self._client.clear_scroll(scroll_id=scroll_id)


class CustomAuth(opensearchpy.RequestsAWSV4SignerAuth):
# TODO - better name once we see if this works
def __init__(self, credentials, region, username: str, password: str, service: str = "es") -> None: # type: ignore
super().__init__(credentials, region, service)

self.username = username
self.password = password

def __call__(self, request): # type: ignore
# Use HTTPBasicAuth's __call__ method to add a
# username+password authorization header
request = HTTPBasicAuth(self.username, self.password)(request)

return self._sign_request(request)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow this was easy 💯



def _get_connection_parameters(opensearch_config: OpensearchConfig) -> dict[str, Any]:
# See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-amazon-opensearch-serverless
# See: https://opensearch.org/docs/latest/clients/python-low-level/#connecting-to-opensearch
# for further details on configuring the connection to OpenSearch

params = dict(
hosts=[{"host": opensearch_config.host, "port": opensearch_config.port}],
hosts=[{"host": opensearch_config.search_endpoint, "port": opensearch_config.search_port}],
http_compress=True,
use_ssl=opensearch_config.use_ssl,
verify_certs=opensearch_config.verify_certs,
use_ssl=opensearch_config.search_use_ssl,
verify_certs=opensearch_config.search_verify_certs,
connection_class=opensearchpy.RequestsHttpConnection,
pool_maxsize=opensearch_config.connection_pool_size,
pool_maxsize=opensearch_config.search_connection_pool_size,
)

# If an AWS region is set, we assume we're running non-locally
# and will attempt to authenticate with AOSS
if opensearch_config.aws_region is not None:
# Get credentials and authorize with AWS Opensearch Serverless (aoss)
# We'll assume if the aws_region is set, we're running in AWS
# and should connect using the session credentials
if opensearch_config.search_username and opensearch_config.search_password:
# Get credentials and authorize with AWS Opensearch Serverless (es)
credentials = boto3.Session().get_credentials()
auth = opensearchpy.AWSV4SignerAuth(credentials, opensearch_config.aws_region, "aoss")
auth = CustomAuth(
credentials=credentials,
region=opensearch_config.aws_region,
username=opensearch_config.search_username,
password=opensearch_config.search_password,
)
params["http_auth"] = auth

return params
31 changes: 15 additions & 16 deletions api/src/adapters/search/opensearch_config.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
import logging

from pydantic import Field
from pydantic_settings import SettingsConfigDict

from src.util.env_config import PydanticBaseEnvConfig

logger = logging.getLogger(__name__)


class OpensearchConfig(PydanticBaseEnvConfig):
model_config = SettingsConfigDict(env_prefix="OPENSEARCH_")

# TODO - hacky fix to get the API working again, host/port should
# be defined in terraform env vars
host: str = Field(default="NOT_DEFINED") # OPENSEARCH_HOST
port: int = Field(default=1) # OPENSEARCH_PORT
use_ssl: bool = Field(default=True) # OPENSEARCH_USE_SSL
verify_certs: bool = Field(default=True) # OPENSEARCH_VERIFY_CERTS
connection_pool_size: int = Field(default=10) # OPENSEARCH_CONNECTION_POOL_SIZE
search_endpoint: str = Field(default="NOT_DEFINED") # SEARCH_ENDPOINT
search_port: int = Field(default=443) # SEARCH_PORT

# AWS configuration
aws_region: str | None = Field(default=None) # OPENSEARCH_AWS_REGION
search_username: str | None = Field(default=None) # SEARCH_USERNAME
search_password: str | None = Field(default=None) # SEARCH_PASSWORD

search_use_ssl: bool = Field(default=True) # SEARCH_USE_SSL
search_verify_certs: bool = Field(default=True) # SEARCH_VERIFY_CERTS
search_connection_pool_size: int = Field(default=10) # SEARCH_CONNECTION_POOL_SIZE

aws_region: str | None = Field(default=None)


def get_opensearch_config() -> OpensearchConfig:
Expand All @@ -29,11 +28,11 @@ def get_opensearch_config() -> OpensearchConfig:
logger.info(
"Constructed opensearch configuration",
extra={
"host": opensearch_config.host,
"port": opensearch_config.port,
"use_ssl": opensearch_config.use_ssl,
"verify_certs": opensearch_config.verify_certs,
"connection_pool_size": opensearch_config.connection_pool_size,
"search_endpoint": opensearch_config.search_endpoint,
"search_port": opensearch_config.search_port,
"search_use_ssl": opensearch_config.search_use_ssl,
"search_verify_certs": opensearch_config.search_verify_certs,
"search_connection_pool_size": opensearch_config.search_connection_pool_size,
},
)

Expand Down
2 changes: 1 addition & 1 deletion api/src/search/backend/load_opportunities_to_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]:
CurrentOpportunitySummary.opportunity_status.isnot(None),
)
.options(selectinload("*"), noload(Opportunity.all_opportunity_summaries))
.execution_options(yield_per=5000)
.execution_options(yield_per=1000)
)
.scalars()
.partitions()
Expand Down
8 changes: 5 additions & 3 deletions api/src/search/backend/load_search_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import src.adapters.db as db
import src.adapters.search as search
from src.adapters.db import flask_db
from src.adapters.search import flask_opensearch
from src.search.backend.load_opportunities_to_index import LoadOpportunitiesToIndex
from src.search.backend.load_search_data_blueprint import load_search_data_blueprint
from src.task.ecs_background_task import ecs_background_task
Expand All @@ -17,8 +18,9 @@
help="Whether to run a full refresh, or only incrementally update oppportunities",
)
@flask_db.with_db_session()
@flask_opensearch.with_search_client()
@ecs_background_task(task_name="load-opportunity-data-opensearch")
def load_opportunity_data(db_session: db.Session, full_refresh: bool) -> None:
search_client = search.SearchClient()

def load_opportunity_data(
search_client: search.SearchClient, db_session: db.Session, full_refresh: bool
) -> None:
LoadOpportunitiesToIndex(db_session, search_client, full_refresh).run()
2 changes: 1 addition & 1 deletion api/tests/src/adapters/search/test_opensearch_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_get_connection_parameters():

# Mostly validating defaults get used
assert params == {
"hosts": [{"host": config.host, "port": 9200}],
"hosts": [{"host": config.search_endpoint, "port": 9200}],
"http_compress": True,
"use_ssl": False,
"verify_certs": False,
Expand Down
Loading