diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 51fe72c6..f2860dcf 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,7 +14,7 @@ jobs: - uses: TrueBrain/actions-flake8@v2 with: flake8_version: 6.0.0 - plugins: flake8-isort~=6.0 + plugins: flake8-isort==6.1.1 flake8-quotes==3.4.0 requirements: runs-on: ubuntu-latest diff --git a/setup.cfg b/setup.cfg index 2b124160..45a2434c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,3 +6,4 @@ skip_gitignore = true [flake8] max-line-length = 120 +inline-quotes = double diff --git a/tea-cli/tea_cli/deploy.py b/tea-cli/tea_cli/deploy.py index 02873d65..1eb23079 100644 --- a/tea-cli/tea_cli/deploy.py +++ b/tea-cli/tea_cli/deploy.py @@ -32,7 +32,7 @@ def deploy(self, session: boto3.Session): client.create_bucket( Bucket=self.name, CreateBucketConfiguration={ - 'LocationConstraint': session.region_name + "LocationConstraint": session.region_name } ) diff --git a/tests_e2e/conftest.py b/tests_e2e/conftest.py index 27026356..5b16f365 100644 --- a/tests_e2e/conftest.py +++ b/tests_e2e/conftest.py @@ -152,8 +152,8 @@ def _boto3_api_host(stack_name): client = boto3.client("apigateway") rest_apis = client.get_rest_apis() - for api in rest_apis['items']: - if api['name'] == f"{stack_name}-EgressGateway": + for api in rest_apis["items"]: + if api["name"] == f"{stack_name}-EgressGateway": return f"{api['id']}.execute-api.{client.meta.region_name}.amazonaws.com" raise Exception(f"Could not find API for the given stackname {stack_name}") @@ -294,4 +294,4 @@ def pytest_sessionfinish(session, exitstatus): } # Dump results to S3. - boto3.resource('s3').Object(s3_bucket, s3_object).put(Body=testresults, **put_args) + boto3.resource("s3").Object(s3_bucket, s3_object).put(Body=testresults, **put_args) diff --git a/tests_e2e/test_jwt_blacklist.py b/tests_e2e/test_jwt_blacklist.py index f035c715..39181f75 100644 --- a/tests_e2e/test_jwt_blacklist.py +++ b/tests_e2e/test_jwt_blacklist.py @@ -66,8 +66,8 @@ def test_validate_invalid_jwt(urls, auth_cookies, endpoint_patcher): r = requests.get(url, cookies=auth_cookies, allow_redirects=False) assert r.is_redirect is True - assert r.headers['Location'] is not None - assert 'oauth/authorize' in r.headers['Location'] + assert r.headers["Location"] is not None + assert "oauth/authorize" in r.headers["Location"] def test_validate_valid_jwt(urls, auth_cookies, endpoint_patcher): diff --git a/tests_e2e/test_protected.py b/tests_e2e/test_protected.py index 833205b9..c0ccd34a 100644 --- a/tests_e2e/test_protected.py +++ b/tests_e2e/test_protected.py @@ -16,12 +16,12 @@ def test_urs_auth_redirect_for_auth_downloads(urls, auth_cookies, urs_username): assert r.status_code == 303 assert r.is_redirect is True - assert r.headers['Location'] is not None + assert r.headers["Location"] is not None query_params = urllib.parse.parse_qs( - urllib.parse.urlparse(r.headers['Location']).query + urllib.parse.urlparse(r.headers["Location"]).query ) - assert query_params['A-userid'] == [urs_username] - assert 'oauth/authorize' not in r.headers['Location'] + assert query_params["A-userid"] == [urs_username] + assert "oauth/authorize" not in r.headers["Location"] def test_origin_request_header(urls, auth_cookies): @@ -32,7 +32,7 @@ def test_origin_request_header(urls, auth_cookies): r = requests.get(url, cookies=auth_cookies, headers=headers, allow_redirects=False) headers = dict(r.headers) - assert headers.get('x-origin-request-id') == origin_request_value + assert headers.get("x-origin-request-id") == origin_request_value @pytest.mark.parametrize("method", ("get", "head")) @@ -80,7 +80,7 @@ def test_validate_custom_headers(urls, auth_cookies): r = requests.get(url, cookies=auth_cookies, allow_redirects=False) headers = dict(r.headers) - assert headers.get('x-rainheader1') is not None + assert headers.get("x-rainheader1") is not None def test_validate_locate_handles_complex_configuration_key(api_url, auth_cookies): @@ -94,15 +94,15 @@ def test_validate_locate_handles_complex_configuration_key(api_url, auth_cookies def find_bearer_token(auth_cookies): for cookie in auth_cookies: - if cookie.name == 'asf-urs': + if cookie.name == "asf-urs": # Grab the JWT payload: cookie_b64 = cookie.value.split(".")[1] # Fix the padding: - cookie_b64 += '=' * (4 - (len(cookie_b64) % 4)) + cookie_b64 += "=" * (4 - (len(cookie_b64) % 4)) # Decode & Load... cookie_json = json.loads(base64.b64decode(cookie_b64)) - if 'urs-access-token' in cookie_json: - return cookie_json['urs-access-token'] + if "urs-access-token" in cookie_json: + return cookie_json["urs-access-token"] return None diff --git a/thin_egress_app/app.py b/thin_egress_app/app.py index 7faa46cf..ee8216a7 100644 --- a/thin_egress_app/app.py +++ b/thin_egress_app/app.py @@ -82,55 +82,55 @@ def wrapper(*args, **kwargs): log = get_log() -conf_bucket = os.getenv('CONFIG_BUCKET', 'rain-t-config') -template_dir = os.getenv('HTML_TEMPLATE_DIR') +conf_bucket = os.getenv("CONFIG_BUCKET", "rain-t-config") +template_dir = os.getenv("HTML_TEMPLATE_DIR") # Here's a lifetime-of lambda cache of these values: -bucket_map_file = os.getenv('BUCKET_MAP_FILE', 'bucket_map.yaml') +bucket_map_file = os.getenv("BUCKET_MAP_FILE", "bucket_map.yaml") b_map = None # TODO(reweeden): Refactor when wrapped attributes are implemented # https://github.com/tkem/cachetools/issues/176 get_bucket_region_cache = cachetools.LRUCache(maxsize=128) -STAGE = os.getenv('STAGE_NAME', 'DEV') +STAGE = os.getenv("STAGE_NAME", "DEV") -JWT_COOKIE_NAME = 'asf-urs' +JWT_COOKIE_NAME = "asf-urs" JWT_MANAGER = JwtManager( - algorithm=os.getenv('JWT_ALGO', 'RS256'), + algorithm=os.getenv("JWT_ALGO", "RS256"), public_key=None, private_key=None, - cookie_name=os.getenv('JWT_COOKIENAME', JWT_COOKIE_NAME), - session_ttl_in_hours=float(os.getenv('SESSION_TTL_HRS', 7 * 24)), + cookie_name=os.getenv("JWT_COOKIENAME", JWT_COOKIE_NAME), + session_ttl_in_hours=float(os.getenv("SESSION_TTL_HRS", 7 * 24)), ) TEMPLATE_MANAGER = TemplateManager(conf_bucket, template_dir) @ttl_cache(maxsize=2, ttl=10 * 60, timer=time.time) def get_black_list(): - endpoint = os.getenv('BLACKLIST_ENDPOINT', '') + endpoint = os.getenv("BLACKLIST_ENDPOINT", "") if endpoint: - response = urllib.request.urlopen(endpoint).read().decode('utf-8') - return json.loads(response)['blacklist'] + response = urllib.request.urlopen(endpoint).read().decode() + return json.loads(response)["blacklist"] return {} -app = Chalice(app_name='egress-lambda') +app = Chalice(app_name="egress-lambda") -@app.middleware('http') +@app.middleware("http") def initialize(event, get_response): JWT_MANAGER.black_list = get_black_list() - jwt_keys = retrieve_secret(os.getenv('JWT_KEY_SECRET_NAME')) - JWT_MANAGER.public_key = base64.b64decode(jwt_keys.get('rsa_pub_key', '')).decode() - JWT_MANAGER.private_key = base64.b64decode(jwt_keys.get('rsa_priv_key', '')).decode() + jwt_keys = retrieve_secret(os.getenv("JWT_KEY_SECRET_NAME")) + JWT_MANAGER.public_key = base64.b64decode(jwt_keys.get("rsa_pub_key", "")).decode() + JWT_MANAGER.private_key = base64.b64decode(jwt_keys.get("rsa_priv_key", "")).decode() return get_response(event) -@app.middleware('http') +@app.middleware("http") def set_log_context(event: chalice.app.Request, get_response): - origin_request_id = event.headers.get('x-origin-request-id') + origin_request_id = event.headers.get("x-origin-request-id") log_context( route=event.path, @@ -149,16 +149,16 @@ def set_log_context(event: chalice.app.Request, get_response): log_context(user_id=None, route=None, request_id=None) -@app.middleware('http') +@app.middleware("http") def forward_origin_request_id(event: chalice.app.Request, get_response): response = get_response(event) - origin_request_id = event.headers.get('x-origin-request-id') + origin_request_id = event.headers.get("x-origin-request-id") - response.headers['x-request-id'] = event.lambda_context.aws_request_id + response.headers["x-request-id"] = event.lambda_context.aws_request_id if origin_request_id: # If we were passed in an x-origin-request-id header, pass it out too - response.headers['x-origin-request-id'] = origin_request_id + response.headers["x-origin-request-id"] = origin_request_id return response @@ -294,7 +294,7 @@ def get_aux_request_headers(): @with_trace() def check_for_browser(hdrs): - return 'user-agent' in hdrs and hdrs['user-agent'].lower().startswith('mozilla') + return "user-agent" in hdrs and hdrs["user-agent"].lower().startswith("mozilla") @with_trace() @@ -310,24 +310,26 @@ def get_user_from_token(token): urs_creds = get_urs_creds() params = { - 'client_id': urs_creds['UrsId'], + "client_id": urs_creds["UrsId"], # The client_id of the non SSO application you registered with Earthdata Login - 'token': token + "token": token } - url = '{}/oauth/tokens/user?{}'.format(os.getenv('AUTH_BASE_URL', 'https://urs.earthdata.nasa.gov'), - urlencode(params)) + url = "{}/oauth/tokens/user?{}".format( + os.getenv("AUTH_BASE_URL", "https://urs.earthdata.nasa.gov"), + urlencode(params) + ) - authval = "Basic {}".format(urs_creds['UrsAuth']) - headers = {'Authorization': authval} + authval = f"Basic {urs_creds['UrsAuth']}" + headers = {"Authorization": authval} # Tack on auxillary headers headers.update(get_aux_request_headers()) - log.debug(f'headers: {headers}, params: {params}') + log.debug("headers: %s, params: %s", headers, params) _time = time.time() - req = request.Request(url, headers=headers, method='POST') + req = request.Request(url, headers=headers, method="POST") try: response = request.urlopen(req) except HTTPError as e: @@ -340,45 +342,59 @@ def get_user_from_token(token): try: msg = json.loads(payload) except json.JSONDecodeError: - log.error(f'could not get json message from payload: {payload}') + log.error("could not get json message from payload: %s", payload) msg = {} - log.debug(f'raw payload: {payload}') - log.debug(f'json loads: {msg}') - log.debug(f'code: {response.code}') + log.debug("raw payload: %s", payload) + log.debug("json loads: %s", msg) + log.debug("code: %s", response.code) if response.code == 200: try: - return msg['uid'] + return msg["uid"] except KeyError as e: log.error( - f'Problem with return from URS: e: {e}, url: {url}, params: {params}, response payload: {payload}, ') + "Problem with return from URS: e: %s, url: %s, params: %s, response payload: %s", + e, + url, + params, + payload, + ) return None elif response.code == 403: - if 'error_description' in msg and 'eula' in msg['error_description'].lower(): + if "error_description" in msg and "eula" in msg["error_description"].lower(): # sample json in this case: # `{"status_code": 403, "error_description": "EULA Acceptance Failure", # "resolution_url": "http://uat.urs.earthdata.nasa.gov/approve_app?client_id=LqWhtVpLmwaD4VqHeoN7ww"}` - log.warning('user needs to sign the EULA') + log.warning("user needs to sign the EULA") raise EulaException(msg) # Probably an expired token if here - log.warning(f'403 error from URS: {msg}') + log.warning("403 error from URS: %s", msg) else: - if 'error' in msg: + if "error" in msg: errtxt = msg["error"] else: - errtxt = '' - if 'error_description' in msg: - errtxt = errtxt + ' ' + msg['error_description'] - - log.error(f'Error getting URS userid from token: {errtxt} with code {response.code}') - log.debug(f'url: {url}, params: {params}, ') + errtxt = "" + if "error_description" in msg: + errtxt = errtxt + " " + msg["error_description"] + + log.error( + "Error getting URS userid from token: %s with code %s", + errtxt, + response.code, + ) + log.debug("url: %s, params: %s", url, params) return None @with_trace() def cumulus_log_message(outcome: str, code: int, http_method: str, k_v: dict): - k_v.update({'code': code, 'http_method': http_method, 'status': outcome, 'requestid': get_request_id()}) + k_v.update({ + "code": code, + "http_method": http_method, + "status": outcome, + "requestid": get_request_id(), + }) print(json.dumps(k_v)) @@ -386,15 +402,19 @@ def cumulus_log_message(outcome: str, code: int, http_method: str, k_v: dict): def restore_bucket_vars(): global b_map # pylint: disable=global-statement - log.debug('conf bucket: %s, bucket_map_file: %s', conf_bucket, bucket_map_file) + log.debug("conf bucket: %s, bucket_map_file: %s", conf_bucket, bucket_map_file) if b_map is None: - log.info('downloading various bucket configs from %s: bucketmapfile: %s, ', conf_bucket, bucket_map_file) + log.info( + "downloading various bucket configs from %s: bucketmapfile: %s", + conf_bucket, + bucket_map_file, + ) b_map_dict = get_yaml_file(conf_bucket, bucket_map_file) - reverse = os.getenv('USE_REVERSE_BUCKET_MAP', 'FALSE').lower() == 'true' - iam_compatible = os.getenv('ENABLE_S3_CREDENTIALS_ENDPOINT', 'FALSE').lower() == 'true' + reverse = os.getenv("USE_REVERSE_BUCKET_MAP", "FALSE").lower() == "true" + iam_compatible = os.getenv("ENABLE_S3_CREDENTIALS_ENDPOINT", "FALSE").lower() == "true" - log.debug('bucket map: %s', b_map_dict) + log.debug("bucket map: %s", b_map_dict) try: b_map = BucketMap( b_map_dict, @@ -403,34 +423,34 @@ def restore_bucket_vars(): iam_compatible=iam_compatible ) except ValueError: - log.error('Invalid bucket map, please consult the TEA documentation') + log.error("Invalid bucket map, please consult the TEA documentation") if iam_compatible: log.info( - 'Your bucket map permissions are configured in such a way ' - 'that they cannot be converted to an IAM policy. Either ' - 'fix your bucket map, or disable S3 credentials.' + "Your bucket map permissions are configured in such a way " + "that they cannot be converted to an IAM policy. Either " + "fix your bucket map, or disable S3 credentials." ) raise else: - log.info('reusing old bucket configs') + log.info("reusing old bucket configs") @with_trace() def do_auth_and_return(ctxt): - log.debug('context: {}'.format(ctxt)) - here = ctxt['path'] - if os.getenv('DOMAIN_NAME'): + log.debug("context: %s", ctxt) + here = ctxt["path"] + if os.getenv("DOMAIN_NAME"): # Pop STAGE value off the request if we have a custom domain # TODO(reweeden): python3.9 use `str.removeprefix` - prefix = f'/{STAGE}' + prefix = f"/{STAGE}" if here.startswith(prefix): here = here[len(prefix):] - log.info("here will be {0}".format(here)) + log.info("here will be %s", here) redirect_here = quote_plus(here) urs_url = get_urs_url(ctxt, redirect_here) - log.info("Redirecting for auth: {0}".format(urs_url)) - return Response(body='', status_code=302, headers={'Location': urs_url}) + log.info("Redirecting for auth: %s", urs_url) + return Response(body="", status_code=302, headers={"Location": urs_url}) @with_trace() @@ -438,33 +458,42 @@ def add_cors_headers(headers): assert app.current_request is not None # send CORS headers if we're configured to use them - origin_header = app.current_request.headers.get('origin') + origin_header = app.current_request.headers.get("origin") if origin_header is not None: cors_origin = os.getenv("CORS_ORIGIN") - if cors_origin and (origin_header.endswith(cors_origin) or origin_header.lower() == 'null'): - headers['Access-Control-Allow-Origin'] = origin_header - headers['Access-Control-Allow-Credentials'] = 'true' + if cors_origin and (origin_header.endswith(cors_origin) or origin_header.lower() == "null"): + headers["Access-Control-Allow-Origin"] = origin_header + headers["Access-Control-Allow-Credentials"] = "true" else: - log.warning(f'Origin {origin_header} is not an approved CORS host: {cors_origin}') + log.warning( + "Origin %s is not an approved CORS host: %s", + origin_header, + cors_origin, + ) @with_trace() def make_redirect(to_url, headers=None, status_code=301): if headers is None: headers = {} - headers['Location'] = to_url + headers["Location"] = to_url add_cors_headers(headers) - log.info(f'Redirect created. to_url: {to_url}') - cumulus_log_message('success', status_code, 'GET', {'redirect': 'yes', 'redirect_URL': to_url}) - log.debug(f'headers for redirect: {headers}') - return Response(body='', headers=headers, status_code=status_code) + log.info("Redirect created. to_url: %s", to_url) + cumulus_log_message( + "success", + status_code, + "GET", + {"redirect": "yes", "redirect_URL": to_url}, + ) + log.debug("headers for redirect: %s", headers) + return Response(body="", headers=headers, status_code=status_code) @with_trace() -def make_html_response(t_vars: dict, headers: dict, status_code: int = 200, template_file: str = 'root.html'): +def make_html_response(t_vars: dict, headers: dict, status_code: int = 200, template_file: str = "root.html"): template_vars = { - 'STAGE': STAGE if not os.getenv('DOMAIN_NAME') else None, - 'status_code': status_code, + "STAGE": STAGE if not os.getenv("DOMAIN_NAME") else None, + "status_code": status_code, **t_vars } @@ -473,22 +502,24 @@ def make_html_response(t_vars: dict, headers: dict, status_code: int = 200, temp status_code=status_code, headers={ **headers, - 'Content-Type': 'text/html' + "Content-Type": "text/html" } ) @with_trace() def get_bcconfig(user_id: str) -> dict: - bcconfig = {"user_agent": "Thin Egress App for userid={0}".format(user_id), - "s3": {"addressing_style": "path"}, - "connect_timeout": 600, - "read_timeout": 600, - "retries": {"max_attempts": 10}} + bcconfig = { + "user_agent": f"Thin Egress App for userid={user_id}", + "s3": {"addressing_style": "path"}, + "connect_timeout": 600, + "read_timeout": 600, + "retries": {"max_attempts": 10}, + } - signature_version = os.getenv('S3_SIGNATURE_VERSION') + signature_version = os.getenv("S3_SIGNATURE_VERSION") if signature_version: - bcconfig['signature_version'] = signature_version + bcconfig["signature_version"] = signature_version return bcconfig @@ -502,18 +533,20 @@ def get_bcconfig(user_id: str) -> dict: def get_bucket_region(session, bucketname) -> str: try: _time = time.time() - bucket_region = session.client('s3').get_bucket_location(Bucket=bucketname)['LocationConstraint'] or 'us-east-1' + bucket_region = session.client("s3").get_bucket_location( + Bucket=bucketname, + )["LocationConstraint"] or "us-east-1" log.info(return_timing_object( service="s3", endpoint=f"client().get_bucket_location({bucketname})", duration=duration(_time) )) - log.debug("bucket {0} is in region {1}".format(bucketname, bucket_region)) + log.debug("bucket %s is in region %s", bucketname, bucket_region) return bucket_region except ClientError as e: # We hit here if the download role cannot access a bucket, or if it doesn't exist - log.error("Could not access download bucket {0}: {1}".format(bucketname, e)) + log.error("Could not access download bucket %s: %s", bucketname, e) raise @@ -521,14 +554,14 @@ def get_bucket_region(session, bucketname) -> str: def get_user_ip(): assert app.current_request is not None - x_forwarded_for = app.current_request.headers.get('x-forwarded-for') + x_forwarded_for = app.current_request.headers.get("x-forwarded-for") if x_forwarded_for: - ip = x_forwarded_for.replace(' ', '').split(',')[0] - log.debug(f"x-fowarded-for: {x_forwarded_for}") - log.info(f"Assuming {ip} is the users IP") + ip = x_forwarded_for.replace(" ", "").split(",")[0] + log.debug("x-fowarded-for: %s", x_forwarded_for) + log.info("Assuming %s is the users IP", ip) return ip - ip = app.current_request.context['identity']['sourceIp'] - log.debug(f"NO x_fowarded_for, using sourceIp: {ip} instead") + ip = app.current_request.context["identity"]["sourceIp"] + log.debug("NO x_fowarded_for, using sourceIp: %s instead", ip) return ip @@ -539,7 +572,7 @@ def try_download_from_bucket(bucket, filename, user_profile, headers: dict): user_id = None if user_profile is not None: user_id = user_profile.user_id - log.info("User Id for download is {0}".format(user_id)) + log.info("User Id for download is %s", user_id) log_context(user_id=user_id) timer.mark("check_in_region_request()") @@ -555,31 +588,47 @@ def try_download_from_bucket(bucket, filename, user_profile, headers: dict): timer.mark() except ClientError as e: try: - code = e.response['ResponseMetadata']['HTTPStatusCode'] + code = e.response["ResponseMetadata"]["HTTPStatusCode"] except (AttributeError, KeyError, IndexError): code = 400 - log.debug(f'response: {e.response}') - log.error(f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}') - cumulus_log_message('failure', code, 'GET', {'reason': 'ClientError', 's3': f'{bucket}/{filename}'}) - template_vars = {'contentstring': 'There was a problem accessing download data.', - 'title': 'Data Not Available', - 'requestid': get_request_id(), - } + log.debug("response: %s", e.response) + log.error( + "ClientError while %s tried downloading %s/%s: %s", + user_id, + bucket, + filename, + e, + ) + cumulus_log_message( + "failure", + code, + "GET", + {"reason": "ClientError", "s3": f"{bucket}/{filename}"}, + ) + template_vars = { + "contentstring": "There was a problem accessing download data.", + "title": "Data Not Available", + "requestid": get_request_id(), + } headers = {} - return make_html_response(template_vars, headers, code, 'error.html') - - log.debug('this region: {}'.format(os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) - if bucket_region != os.getenv('AWS_DEFAULT_REGION'): - log_message = "bucket {0} is in region {1}, we are in region {2}! " + \ - "This is double egress in Proxy mode!" - log.warning(log_message.format(bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) + return make_html_response(template_vars, headers, code, "error.html") + + log.debug("this region: %s", os.getenv("AWS_DEFAULT_REGION")) + if bucket_region != os.getenv("AWS_DEFAULT_REGION"): + log.warning( + "bucket %s is in region %s, we are in region %s! " + "This is double egress in Proxy mode!", + bucket, + bucket_region, + os.getenv("AWS_DEFAULT_REGION"), + ) client = get_bc_config_client(user_id) - log.debug('timing for try_download_from_bucket(): ') + log.debug("timing for try_download_from_bucket(): ") timer.log_all(log) - log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) + log.info("Attempting to download s3://%s/%s", bucket, filename) # We'll cache the size later. head_check = {} @@ -593,18 +642,18 @@ def try_download_from_bucket(bucket, filename, user_profile, headers: dict): head_check = client.head_object(Bucket=bucket, Key=filename, Range=(range_header or "")) log.info(return_timing_object(service="s3", endpoint="client.head_object()", duration=duration(_time))) - redirheaders = {'Range': range_header} if range_header else {} + redirheaders = {"Range": range_header} if range_header else {} expires_in = 3600 - offset - redirheaders['Cache-Control'] = 'private, max-age={0}'.format(expires_in - 60) + redirheaders["Cache-Control"] = f"private, max-age={expires_in - 60}" if isinstance(headers, dict): - log.debug(f'adding {headers} to redirheaders {redirheaders}') + log.debug("adding %s to redirheaders %s", headers, redirheaders) redirheaders.update(headers) # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, expires_in, user_id) s3_host = urlparse(presigned_url).netloc - log.debug("Presigned URL host was {0}".format(s3_host)) + log.debug("Presigned URL host was %s", s3_host) download_stat = {"bucket": bucket, "object": filename, "range": range_header} download_stat.update({"InRegion": "True" if is_in_region else "False"}) @@ -612,29 +661,51 @@ def try_download_from_bucket(bucket, filename, user_profile, headers: dict): if size is not None: download_stat.update({"size": size}) - log.info({"download": download_stat}) + log.info("%s", {"download": download_stat}) return make_redirect(presigned_url, redirheaders, 303) except ClientError as e: # Watch for bad range request: - if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: + if e.response["ResponseMetadata"]["HTTPStatusCode"] == 416: # cumulus uses this log message for metrics purposes. - log.error(f"Invalid Range 416, Could not get range {get_range_header_val()} s3://{bucket}/{filename}: {e}") - cumulus_log_message('failure', 416, 'GET', {'reason': 'Invalid Range', - 's3': f'{bucket}/{filename}', - 'range': get_range_header_val()}) - return Response(body='Invalid Range', status_code=416, headers={}) + log.error( + "Invalid Range 416, Could not get range %s s3://%s/%s: %s", + get_range_header_val(), + bucket, + filename, + e, + ) + cumulus_log_message( + "failure", + 416, + "GET", + { + "reason": "Invalid Range", + "s3": f"{bucket}/{filename}", + "range": get_range_header_val(), + }, + ) + return Response(body="Invalid Range", status_code=416, headers={}) # cumulus uses this log message for metrics purposes. - log.warning("Could not download s3://{0}/{1}: {2}".format(bucket, filename, e)) - template_vars = {'contentstring': 'Could not find requested data.', - 'title': 'Data Not Available', - 'requestid': get_request_id(), } + log.warning("Could not download s3://%s/%s: %s", bucket, filename, e) + template_vars = { + "contentstring": "Could not find requested data.", + "title": "Data Not Available", + "requestid": get_request_id(), + } headers = {} - cumulus_log_message('failure', 404, 'GET', - {'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}'}) - return make_html_response(template_vars, headers, 404, 'error.html') + cumulus_log_message( + "failure", + 404, + "GET", + { + "reason": "Could not find requested data", + "s3": f"{bucket}/{filename}", + }, + ) + return make_html_response(template_vars, headers, 404, "error.html") @with_trace() @@ -642,41 +713,41 @@ def get_jwt_field(cookievar: dict, fieldname: str): return cookievar.get(JWT_COOKIE_NAME, {}).get(fieldname, None) -@app.route('/') +@app.route("/") @with_trace(context={}) def root(): - template_vars = {'title': 'Welcome'} + template_vars = {"title": "Welcome"} user_profile = JWT_MANAGER.get_profile_from_headers(app.current_request.headers) if user_profile is not None: log_context(user_id=user_profile.user_id) - if os.getenv('MATURITY') == 'DEV': - template_vars['profile'] = user_profile.to_jwt_payload() + if os.getenv("MATURITY") == "DEV": + template_vars["profile"] = user_profile.to_jwt_payload() else: - template_vars['URS_URL'] = get_urs_url(app.current_request.context) - headers = {'Content-Type': 'text/html'} - return make_html_response(template_vars, headers, 200, 'root.html') + template_vars["URS_URL"] = get_urs_url(app.current_request.context) + headers = {"Content-Type": "text/html"} + return make_html_response(template_vars, headers, 200, "root.html") -@app.route('/logout') +@app.route("/logout") @with_trace(context={}) def logout(): user_profile = JWT_MANAGER.get_profile_from_headers(app.current_request.headers) - template_vars = {'title': 'Logged Out', 'URS_URL': get_urs_url(app.current_request.context)} + template_vars = {"title": "Logged Out", "URS_URL": get_urs_url(app.current_request.context)} if user_profile is not None: - template_vars['contentstring'] = 'You are logged out.' + template_vars["contentstring"] = "You are logged out." else: - template_vars['contentstring'] = 'No active login found.' + template_vars["contentstring"] = "No active login found." headers = { - 'Content-Type': 'text/html', + "Content-Type": "text/html", } - headers.update(JWT_MANAGER.get_header_to_set_auth_cookie(None, os.getenv('COOKIE_DOMAIN', ''))) - return make_html_response(template_vars, headers, 200, 'root.html') + headers.update(JWT_MANAGER.get_header_to_set_auth_cookie(None, os.getenv("COOKIE_DOMAIN", ""))) + return make_html_response(template_vars, headers, 200, "root.html") -@app.route('/login') +@app.route("/login") @with_trace(context={}) def login(): try: @@ -686,77 +757,83 @@ def login(): app.current_request.query_params, app.current_request.context, JWT_MANAGER, - os.getenv('COOKIE_DOMAIN', ''), + os.getenv("COOKIE_DOMAIN", ""), aux_headers=aux_headers ) except ClientError as e: log.error("%s", e) status_code = 500 template_vars = { - 'contentstring': 'Client Error occurred. ', - 'title': 'Client Error', + "contentstring": "Client Error occurred. ", + "title": "Client Error", } if status_code == 301: - return Response(body='', status_code=status_code, headers=headers) + return Response(body="", status_code=status_code, headers=headers) - template_vars['requestid'] = get_request_id() - return make_html_response(template_vars, headers, status_code, 'error.html') + template_vars["requestid"] = get_request_id() + return make_html_response(template_vars, headers, status_code, "error.html") -@app.route('/version') +@app.route("/version") @with_trace(context={}) def version(): log.info("Got a version request!") - version_return = {'version_id': ''} + version_return = {"version_id": ""} # If we've flushed, lets return the flush time. - if os.getenv('BUMP'): - version_return['last_flush'] = os.getenv('BUMP') + if os.getenv("BUMP"): + version_return["last_flush"] = os.getenv("BUMP") return json.dumps(version_return) -@app.route('/locate') +@app.route("/locate") @with_trace(context={}) def locate(): timer = Timer() - timer.mark('restore_bucket_vars()') + timer.mark("restore_bucket_vars()") query_params = app.current_request.query_params - if query_params is None or query_params.get('bucket_name') is None: - return Response(body='Required "bucket_name" query paramater not specified', - status_code=400, - headers={'Content-Type': 'text/plain'}) + if query_params is None or query_params.get("bucket_name") is None: + return Response( + body='Required "bucket_name" query paramater not specified', + status_code=400, + headers={"Content-Type": "text/plain"}, + ) restore_bucket_vars() timer.mark() - bucket_name = query_params.get('bucket_name') + bucket_name = query_params.get("bucket_name") matching_paths = [ entry.bucket_path for entry in b_map.entries() if entry.bucket == bucket_name ] - log.debug('matching_paths: %s', matching_paths) + log.debug("matching_paths: %s", matching_paths) - log.debug('timing for locate(): ') + log.debug("timing for locate(): ") timer.log_all(log) if matching_paths: - return Response(body=json.dumps(matching_paths), - status_code=200, - headers={'Content-Type': 'application/json'}) - return Response(body=f'No route defined for {bucket_name}', - status_code=404, - headers={'Content-Type': 'text/plain'}) + return Response( + body=json.dumps(matching_paths), + status_code=200, + headers={"Content-Type": "application/json"}, + ) + return Response( + body=f"No route defined for {bucket_name}", + status_code=404, + headers={"Content-Type": "text/plain"}, + ) @with_trace() def get_range_header_val(): - if 'Range' in app.current_request.headers: - return app.current_request.headers['Range'] - if 'range' in app.current_request.headers: - return app.current_request.headers['range'] + if "Range" in app.current_request.headers: + return app.current_request.headers["Range"] + if "range" in app.current_request.headers: + return app.current_request.headers["range"] return None @@ -767,7 +844,7 @@ def get_new_session_client(user_id): session = get_role_session(user_id=user_id) _time = time.time() - new_bc_client = session.client('s3', **params) + new_bc_client = session.client("s3", **params) log.info(return_timing_object(service="s3", endpoint="session.client()", duration=duration(_time))) return new_bc_client @@ -782,7 +859,7 @@ def get_bc_config_client(user_id): @with_trace() def get_data_dl_s3_client(): user_profile = JWT_MANAGER.get_profile_from_headers(app.current_request.headers) - user_id = '' + user_id = "" if user_profile is not None: user_id = user_profile.user_id return get_bc_config_client(user_id) @@ -803,25 +880,34 @@ def try_download_head(bucket, filename): client.get_object(Bucket=bucket, Key=filename) else: # TODO: Should both `client.get_object()` be `client.head_object()` ?!?!?! - log.info("Downloading range {0}".format(range_header)) + log.info("Downloading range %s", range_header) client.get_object(Bucket=bucket, Key=filename, Range=range_header) log.info(return_timing_object(service="s3", endpoint="client.get_object()", duration=duration(_time))) timer.mark() except ClientError as e: - log.warning("Could not get head for s3://{0}/{1}: {2}".format(bucket, filename, e)) + log.warning("Could not get head for s3://%s/%s: %s", bucket, filename, e) # cumulus uses this log message for metrics purposes. - template_vars = {'contentstring': 'File not found', - 'title': 'File not found', - 'requestid': get_request_id(), } + template_vars = { + "contentstring": "File not found", + "title": "File not found", + "requestid": get_request_id(), + } headers = {} - cumulus_log_message('failure', 404, 'HEAD', - {'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}'}) - return make_html_response(template_vars, headers, 404, 'error.html') + cumulus_log_message( + "failure", + 404, + "HEAD", + { + "reason": "Could not find requested data", + "s3": f"{bucket}/{filename}", + }, + ) + return make_html_response(template_vars, headers, 404, "error.html") # Try Redirecting to HEAD. There should be a better way. user_profile = JWT_MANAGER.get_profile_from_headers(app.current_request.headers) - user_id = '' + user_id = "" if user_profile is not None: user_id = user_profile.user_id @@ -834,34 +920,42 @@ def try_download_head(bucket, filename): timer.mark("get_bucket_region()") bucket_region = get_bucket_region(session, bucket) timer.mark("get_presigned_url()") - presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, url_lifespan, user_id, - 'HEAD') + presigned_url = get_presigned_url( + creds, + bucket, + filename, + bucket_region, + url_lifespan, + user_id, + "HEAD", + ) timer.mark() s3_host = urlparse(presigned_url).netloc # Return a redirect to a HEAD - log.debug("Presigned HEAD URL host was {0}".format(s3_host)) + log.debug("Presigned HEAD URL host was %s", s3_host) - log.debug('timing for try_download_head()') + log.debug("timing for try_download_head()") timer.log_all(log) return make_redirect(presigned_url, {}, 303) # Attempt to validate HEAD request -@app.route('/{proxy+}', methods=['HEAD']) +@app.route("/{proxy+}", methods=["HEAD"]) @with_trace(context={}) def dynamic_url_head(): timer = Timer() timer.mark("restore_bucket_vars()") - log.debug('attempting to HEAD a thing') + + param = app.current_request.uri_params.get("proxy") + log.debug("attempting to HEAD: %s", param) restore_bucket_vars() timer.mark("b_map.get()") - param = app.current_request.uri_params.get('proxy') if param is None: - return Response(body='HEAD failed', headers={}, status_code=400) + return Response(body="HEAD failed", headers={}, status_code=400) entry = b_map.get(param) timer.mark() @@ -870,29 +964,29 @@ def dynamic_url_head(): if entry is None: template_vars = { - 'contentstring': 'Bucket not available', - 'title': 'Bucket not available', - 'requestid': get_request_id() + "contentstring": "Bucket not available", + "title": "Bucket not available", + "requestid": get_request_id() } headers = {} - return make_html_response(template_vars, headers, 404, 'error.html') + return make_html_response(template_vars, headers, 404, "error.html") timer.mark() - log.debug('timing for dynamic_url_head()') + log.debug("timing for dynamic_url_head()") timer.log_all(log) return try_download_head(entry.bucket, entry.object_key) -@app.route('/{proxy+}', methods=['GET']) +@app.route("/{proxy+}", methods=["GET"]) @with_trace(context={}) def dynamic_url(): timer = Timer() timer.mark("restore_bucket_vars()") param = app.current_request.uri_params.get("proxy") - log.debug('attempting to GET: %s', param) + log.debug("attempting to GET: %s", param) restore_bucket_vars() - log.debug(f'b_map: {b_map.bucket_map}') + log.debug("b_map: %s", b_map.bucket_map) timer.mark() log.info(app.current_request.headers) @@ -901,33 +995,36 @@ def dynamic_url(): if param is not None: entry = b_map.get(param) - log.debug('entry: %s', entry) + log.debug("entry: %s", entry) if not entry: template_vars = { - 'contentstring': 'File not found', - 'title': 'File not found', - 'requestid': get_request_id(), + "contentstring": "File not found", + "title": "File not found", + "requestid": get_request_id(), } headers = {} - return make_html_response(template_vars, headers, 404, 'error.html') + return make_html_response(template_vars, headers, 404, "error.html") if not entry.object_key: - log.warning('Request was made to directory listing instead of object: %s', entry.bucket_path) + log.warning( + "Request was made to directory listing instead of object: %s", + entry.bucket_path, + ) template_vars = { - 'contentstring': 'Request does not appear to be valid.', - 'title': 'Request Not Serviceable', - 'requestid': get_request_id() + "contentstring": "Request does not appear to be valid.", + "title": "Request Not Serviceable", + "requestid": get_request_id() } headers = {} - return make_html_response(template_vars, headers, 404, 'error.html') + return make_html_response(template_vars, headers, 404, "error.html") custom_headers = dict(entry.headers) timer.mark("get_required_groups()") # It's only necessary to be in one of these groups required_groups = entry.get_required_groups() - log.debug('required_groups: %s', required_groups) + log.debug("required_groups: %s", required_groups) # Check for public bucket timer.mark("possible auth header handling") user_profile = JWT_MANAGER.get_profile_from_headers(app.current_request.headers) @@ -948,24 +1045,24 @@ def dynamic_url(): new_jwt_cookie_headers = {} if new_user_profile: - log.debug(f"We got new profile from user_in_group() {new_user_profile}") + log.debug("We got new profile from user_in_group() %s", new_user_profile) user_profile = new_user_profile new_jwt_cookie_headers.update( - JWT_MANAGER.get_header_to_set_auth_cookie(user_profile, os.getenv('COOKIE_DOMAIN', ''))) + JWT_MANAGER.get_header_to_set_auth_cookie(user_profile, os.getenv("COOKIE_DOMAIN", ""))) - log.debug('user_in_group: {}'.format(u_in_g)) + log.debug("user_in_group: %s", u_in_g) # Check that the bucket is either NOT private, or user belongs to that group if required_groups and not u_in_g: template_vars = { - 'contentstring': 'This data is not currently available.', - 'title': 'Could not access data', - 'requestid': get_request_id() + "contentstring": "This data is not currently available.", + "title": "Could not access data", + "requestid": get_request_id() } - return make_html_response(template_vars, new_jwt_cookie_headers, 403, 'error.html') + return make_html_response(template_vars, new_jwt_cookie_headers, 403, "error.html") custom_headers.update(new_jwt_cookie_headers) - log.debug(f'custom headers before try download from bucket: {custom_headers}') + log.debug("custom headers before try download from bucket: %s", custom_headers) timer.mark() log.debug("timing for dynamic_url()") @@ -974,7 +1071,7 @@ def dynamic_url(): return try_download_from_bucket(entry.bucket, entry.object_key, user_profile, custom_headers) -@app.route('/s3credentials', methods=['GET']) +@app.route("/s3credentials", methods=["GET"]) @with_trace(context={}) def s3credentials(): timer = Timer() @@ -1065,26 +1162,31 @@ def get_s3_credentials(user_id: str, role_session_name: str, policy: dict): return response["Credentials"] -@app.route('/s3credentialsREADME', methods=['GET']) +@app.route("/s3credentialsREADME", methods=["GET"]) @with_trace(context={}) def s3credentials_readme(): return make_html_response({}, {}, 200, "s3credentials_readme.html") -@app.route('/profile') +@app.route("/profile") @with_trace(context={}) def profile(): - return Response(body='Profile not available.', - status_code=200, headers={'Content-Type': 'text/plain'}) + return Response( + body="Profile not available.", + status_code=200, + headers={"Content-Type": "text/plain"}, + ) -@app.route('/pubkey', methods=['GET']) +@app.route("/pubkey", methods=["GET"]) @with_trace(context={}) def pubkey(): - thebody = json.dumps({ - 'rsa_pub_key': JWT_MANAGER.public_key, - 'algorithm': JWT_MANAGER.algorithm + body = json.dumps({ + "rsa_pub_key": JWT_MANAGER.public_key, + "algorithm": JWT_MANAGER.algorithm, }) - return Response(body=thebody, - status_code=200, - headers={'content-type': 'application/json'}) + return Response( + body=body, + status_code=200, + headers={"content-type": "application/json"}, + ) diff --git a/thin_egress_app/tea_bumper.py b/thin_egress_app/tea_bumper.py index 3044538e..797f2f92 100644 --- a/thin_egress_app/tea_bumper.py +++ b/thin_egress_app/tea_bumper.py @@ -6,23 +6,25 @@ log = get_log() -TEA_LAMBDA_NAME = os.getenv('TEA_LAMBDA_NAME') +TEA_LAMBDA_NAME = os.getenv("TEA_LAMBDA_NAME") def lambda_handler(event, context): del event - log.info('teabumper!') + log.info("teabumper!") - client = boto3.client('lambda') + client = boto3.client("lambda") egress_env = client.get_function_configuration( FunctionName=TEA_LAMBDA_NAME, - ).get('Environment') or {'Variables': {}} + ).get("Environment") or {"Variables": {}} - egress_env['Variables'].update({'BUMP': f'{str(datetime.utcnow())}, {context.aws_request_id}'}) + egress_env["Variables"].update({ + "BUMP": f"{str(datetime.utcnow())}, {context.aws_request_id}" + }) - log.debug(f"envvar for {TEA_LAMBDA_NAME}: {egress_env['Variables']}") + log.debug("envvar for %s: %s", TEA_LAMBDA_NAME, egress_env["Variables"]) response = client.update_function_configuration( FunctionName=TEA_LAMBDA_NAME, Environment=egress_env diff --git a/thin_egress_app/update_lambda.py b/thin_egress_app/update_lambda.py index f5e5305b..a79ee4e8 100644 --- a/thin_egress_app/update_lambda.py +++ b/thin_egress_app/update_lambda.py @@ -12,7 +12,7 @@ def lambda_handler(event, context): # Get current region session = boto3.session.Session() current_region = session.region_name - client = boto3.client('iam') + client = boto3.client("iam") print(f"Current region in {current_region}") cidr_list = get_region_cidrs(current_region) @@ -22,50 +22,62 @@ def lambda_handler(event, context): new_policy["Statement"][0]["Condition"] = {"IpAddress": {"aws:SourceIp": cidr_list}} # Clear out any pre-existing roles: - RoleName = os.getenv('iam_role_name') + RoleName = os.getenv("iam_role_name") response = client.list_role_policies(RoleName=RoleName) - for PolicyName in response.get('PolicyNames', ()): + for PolicyName in response.get("PolicyNames", ()): print(f"Removing old Policy {PolicyName} from Role {RoleName}") response = client.delete_role_policy(RoleName=RoleName, PolicyName=PolicyName) # Put the new policy - response = client.put_role_policy(RoleName=RoleName, PolicyName=os.getenv('policy_name'), - PolicyDocument=json.dumps(new_policy)) + response = client.put_role_policy( + RoleName=RoleName, + PolicyName=os.getenv("policy_name"), + PolicyDocument=json.dumps(new_policy), + ) # Check if response is coming from CloudFormation - if 'ResponseURL' in event: - print("Sending success message to callback URL {0}".format(event['ResponseURL'])) - cfnresponse.send(event, context, cfnresponse.SUCCESS, {'Data': "Good"}) + if "ResponseURL" in event: + print(f"Sending success message to callback URL {event['ResponseURL']}") + cfnresponse.send(event, context, cfnresponse.SUCCESS, {"Data": "Good"}) return response except Exception as e: - error_string = "There was a problem updating policy {0} for Role {1} in region {2}: {3}".format( - os.getenv('policy_name'), os.getenv('iam_role_name'), current_region, e) + error_string = ( + f"There was a problem updating policy {os.getenv('policy_name')} " + f"for Role {os.getenv('iam_role_name')} " + f"in region {current_region}: {e}" + ) print(error_string) - if 'ResponseURL' in event: - print("Sending FAILURE message to callback URL {0}".format(event['ResponseURL'])) - cfnresponse.send(event, context, cfnresponse.FAILED, {'Data': error_string}) + if "ResponseURL" in event: + print(f"Sending FAILURE message to callback URL {event['ResponseURL']}") + cfnresponse.send(event, context, cfnresponse.FAILED, {"Data": error_string}) return False def get_region_cidrs(current_region): # Bandit complains with B310 on the line below. We know the URL, this is safe! - output = urllib.request.urlopen('https://ip-ranges.amazonaws.com/ip-ranges.json').read().decode('utf-8') # nosec - ip_ranges = json.loads(output)['prefixes'] - in_region_amazon_ips = [item['ip_prefix'] for item in ip_ranges if - item["service"] == "AMAZON" and item["region"] == current_region] - # It's important to filter down the CIDR range as much as possible. Too large can cause the role creation to fail. + output = urllib.request.urlopen( + "https://ip-ranges.amazonaws.com/ip-ranges.json" + ).read().decode() # nosec + ip_ranges = json.loads(output)["prefixes"] + in_region_amazon_ips = [ + item["ip_prefix"] + for item in ip_ranges + if item["service"] == "AMAZON" and item["region"] == current_region + ] + # It's important to filter down the CIDR range as much as possible. + # Too large can cause the role creation to fail. in_region_amazon_ips = [str(ip) for ip in cidr_merge(in_region_amazon_ips)] # Add in Private IP Space - in_region_amazon_ips.append('10.0.0.0/8') + in_region_amazon_ips.append("10.0.0.0/8") return in_region_amazon_ips def get_base_policy(prefix): - vpcid = os.getenv('vpcid') + vpcid = os.getenv("vpcid") return { "Version": "2012-10-17", "Statement": [