Skip to content

Commit

Permalink
Mergeback release-5.7 (#112)
Browse files Browse the repository at this point in the history
* Update to 5.7.0 (#95)

* Update version and submodule dependecy to 5.7.0

* Use tab instead

* use streamlined branch name

* Add missing urllib3 dependency (#99)

* Add missing dependency on urllib3 (#98)

* Add missing urllib3 dependency

* Do not use version >2

* Bump version

* Bump to 5.7.1

* DataSource Audit: pass run_id from env variable (#102)

* flight meta middleware for extra metadata

* iteration on comments + add run id to http requests

* moving run id to parameter input

* adding client source environment variable

* cleanup

* updating pyproject version (#109)

---------

Co-authored-by: ddl-gabrielhaim <[email protected]>
Co-authored-by: Gabriel Haim <[email protected]>
  • Loading branch information
3 people authored Sep 1, 2023
1 parent 18e8b15 commit 8596f4b
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 41 deletions.
7 changes: 7 additions & 0 deletions domino_data/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,16 @@ def get_headers(self) -> Dict[str, str]:
class ProxyClient(AuthenticatedClient):
"""A client that authenticates all requests but with Proxy headers."""

client_source: Optional[str] = attr.ib()
run_id: Optional[str] = attr.ib()

def get_headers(self) -> Dict[str, str]:
if self.api_key:
self.headers["X-Domino-Api-Key"] = self.api_key
if self.client_source:
self.headers["X-Domino-Client-Source"] = self.client_source
if self.run_id:
self.headers["X-Domino-Run-Id"] = self.run_id

if self.token_url is not None:
try:
Expand Down
37 changes: 27 additions & 10 deletions domino_data/data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .auth import AuthenticatedClient, AuthMiddlewareFactory, ProxyClient, get_jwt_token
from .configuration_gen import Config, CredElem, DatasourceConfig, find_datasource_klass
from .logging import logger
from .meta import MetaMiddlewareFactory
from .transfer import MAX_WORKERS, BlobTransfer

ACCEPT_HEADERS = {"Accept": "application/json"}
Expand All @@ -43,6 +44,14 @@

AWS_CREDENTIALS_DEFAULT_LOCATION = "/var/lib/domino/home/.aws/credentials"
AWS_SHARED_CREDENTIALS_FILE = "AWS_SHARED_CREDENTIALS_FILE"
DOMINO_API_HOST = "DOMINO_API_HOST"
DOMINO_API_PROXY = "DOMINO_API_PROXY"
DOMINO_CLIENT_SOURCE = "DOMINO_CLIENT_SOURCE"
DOMINO_DATASOURCE_PROXY_HOST = "DOMINO_DATASOURCE_PROXY_HOST"
DOMINO_DATASOURCE_PROXY_FLIGHT_HOST = "DOMINO_DATASOURCE_PROXY_FLIGHT_HOST"
DOMINO_RUN_ID = "DOMINO_RUN_ID"
DOMINO_USER_API_KEY = "DOMINO_USER_API_KEY"
DOMINO_USER_HOST = "DOMINO_USER_HOST"
DOMINO_TOKEN_DEFAULT_LOCATION = "/var/lib/domino/home/.api/token"
DOMINO_TOKEN_FILE = "DOMINO_TOKEN_FILE"

Expand Down Expand Up @@ -245,7 +254,7 @@ def load_oauth_credentials() -> Dict[str, str]:
Raises:
DominoError: if the provided location is not a valid file
"""
token_url = os.getenv("DOMINO_API_PROXY", "")
token_url = os.getenv(DOMINO_API_PROXY, "")
if token_url:
try:
jwt = get_jwt_token(token_url)
Expand Down Expand Up @@ -561,16 +570,19 @@ class DataSourceClient:
proxy: flight.FlightClient = attr.ib(init=False, repr=False)
proxy_http: AuthenticatedClient = attr.ib(init=False, repr=False)

api_key: Optional[str] = attr.ib(factory=lambda: os.getenv("DOMINO_USER_API_KEY"))
token_file: Optional[str] = attr.ib(factory=lambda: os.getenv("DOMINO_TOKEN_FILE"))
token_url: Optional[str] = attr.ib(factory=lambda: os.getenv("DOMINO_API_PROXY"))
api_key: Optional[str] = attr.ib(factory=lambda: os.getenv(DOMINO_USER_API_KEY))
token_file: Optional[str] = attr.ib(factory=lambda: os.getenv(DOMINO_TOKEN_FILE))
token_url: Optional[str] = attr.ib(factory=lambda: os.getenv(DOMINO_API_PROXY))

def __attrs_post_init__(self):
flight_host = os.getenv("DOMINO_DATASOURCE_PROXY_FLIGHT_HOST")
flight_host = os.getenv(DOMINO_DATASOURCE_PROXY_FLIGHT_HOST)
domino_host = os.getenv(
"DOMINO_API_PROXY", os.getenv("DOMINO_API_HOST", os.getenv("DOMINO_USER_HOST", ""))
DOMINO_API_PROXY, os.getenv(DOMINO_API_HOST, os.getenv(DOMINO_USER_HOST, ""))
)
proxy_host = os.getenv("DOMINO_DATASOURCE_PROXY_HOST", "")
proxy_host = os.getenv(DOMINO_DATASOURCE_PROXY_HOST, "")

client_source = os.getenv(DOMINO_CLIENT_SOURCE, "Python")
run_id = os.getenv(DOMINO_RUN_ID, "")

logger.info(
"initializing datasource client with hosts",
Expand All @@ -583,6 +595,8 @@ def __attrs_post_init__(self):
self.proxy_http = ProxyClient(
base_url=proxy_host,
api_key=self.api_key,
client_source=client_source,
run_id=run_id,
token_file=self.token_file,
token_url=self.token_url,
timeout=5.0,
Expand All @@ -599,15 +613,18 @@ def __attrs_post_init__(self):
)

def _set_proxy(self):
flight_host = os.getenv("DOMINO_DATASOURCE_PROXY_FLIGHT_HOST")
client_source = os.getenv(DOMINO_CLIENT_SOURCE, "Python")
flight_host = os.getenv(DOMINO_DATASOURCE_PROXY_FLIGHT_HOST)
run_id = os.getenv(DOMINO_RUN_ID, "")
self.proxy = flight.FlightClient(
flight_host,
middleware=[
AuthMiddlewareFactory(
self.api_key,
self.token_file,
self.token_url,
)
),
MetaMiddlewareFactory(client_source=client_source, run_id=run_id),
],
)

Expand All @@ -625,7 +642,7 @@ def get_datasource(self, name: str) -> Datasource:
"""
logger.info("get_datasource", datasource_name=name)

run_id = os.getenv("DOMINO_RUN_ID")
run_id = os.getenv(DOMINO_RUN_ID)
response = get_datasource_by_name.sync_detailed(
name,
client=self.domino,
Expand Down
43 changes: 43 additions & 0 deletions domino_data/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
""" Classes to augment headers with metadata for HTTP and Flight requests."""

from typing import Optional

import attr
from pyarrow import flight


@attr.s(auto_attribs=True)
class MetaMiddlewareFactory(flight.ClientMiddlewareFactory):
"""Middleware Factory for metadata."""

client_source: Optional[str] = attr.ib()
run_id: Optional[str] = attr.ib()

def start_call(self, _):
"""Called at the start of an RPC."""
return MetaMiddleware(self.client_source, self.run_id)


@attr.s(auto_attribs=True)
class MetaMiddleware(flight.ClientMiddleware):
"""Middleware for authenticating flight requests."""

client_source: Optional[str] = attr.ib()
run_id: Optional[str] = attr.ib()

def call_completed(self, _):
"""No implementation. TODO logging."""

def received_headers(self, _):
"""No implementation."""

def sending_headers(self):
"""Return metadata headers."""
headers = {}

if self.client_source is not None:
headers["x-domino-client-source"] = self.client_source
if self.run_id is not None:
headers["x-domino-run-id"] = self.run_id

return headers
Loading

0 comments on commit 8596f4b

Please sign in to comment.