From d47a4da567fad784443b9725314c77de4c1e18d4 Mon Sep 17 00:00:00 2001 From: monkut Date: Thu, 6 Oct 2022 16:27:43 +0900 Subject: [PATCH] Handle spaces in x-forwared-for/remove six (#1127) (#1163) * :sparkles: Handle spaces in x-forwared-for (#1127) * :fire: remove `six` (no longer needed as 2.x is no longer supported) * :white_check_mark: add testcase for SUPPORTED_VERSION check to increase code coverage. * :art: run black/isort * :wrench: rename function for clarity * :fire: remove unnecessary import * :wrench: change name of unused mock instance * :sparkles: (#879) Fix url decoding for query string * :wrench: change docstring type multi-line comment to standard multiline comment :sparkles: handle case where "requestContext" is not provided by the event. > Systems calling the Lambda (other than API Gateway) may not provide the field requestContext in the event. :white_check_mark: add testcases * :fire: remove duplicate comment * :pencil: add docstring to util function, `get_unsupported_sys_versioninfo()` * :fire: remove double-underscore --- Pipfile | 1 - tests/tests.py | 163 +++++++++++++++++++++++++++++++++++++++++++++++++ tests/utils.py | 7 +++ zappa/wsgi.py | 56 +++++++++-------- 4 files changed, 199 insertions(+), 28 deletions(-) diff --git a/Pipfile b/Pipfile index 0b6cf396f..28839f094 100644 --- a/Pipfile +++ b/Pipfile @@ -35,7 +35,6 @@ python-slugify = "*" PyYAML = "*" # previous versions don't work with urllib3 1.24 requests = ">=2.20.0" -six = "*" toml = "*" tqdm = "*" troposphere = ">=3.0" diff --git a/tests/tests.py b/tests/tests.py index 1a6eb830c..589211684 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -57,6 +57,8 @@ ) from zappa.wsgi import common_log, create_wsgi_request +from .utils import get_unsupported_sys_versioninfo + def random_string(length): return "".join(random.choice(string.printable) for _ in range(length)) @@ -761,6 +763,62 @@ def test_wsgi_event(self): request = create_wsgi_request(event) + def test_wsgi_event_handle_space_in_xforwardedfor(self): + event = { + "body": None, + "resource": "/", + "requestContext": { + "resourceId": "6cqjw9qu0b", + "apiId": "9itr2lba55", + "resourcePath": "/", + "httpMethod": "GET", + "requestId": "c17cb1bf-867c-11e6-b938-ed697406e3b5", + "accountId": "724336686645", + "identity": { + "apiKey": None, + "userArn": None, + "cognitoAuthenticationType": None, + "caller": None, + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0", + "user": None, + "cognitoIdentityPoolId": None, + "cognitoIdentityId": None, + "cognitoAuthenticationProvider": None, + "sourceIp": "50.191.225.98", + "accountId": None, + }, + "stage": "devorr", + }, + "queryStringParameters": None, + "httpMethod": "GET", + "pathParameters": None, + "headers": { + "Via": "1.1 6801928d54163af944bf854db8d5520e.cloudfront.net (CloudFront)", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Forwarded-Proto": "https", + "X-Forwarded-For": "50.191.225.98 , 204.246.168.101", + "CloudFront-Viewer-Country": "US", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Upgrade-Insecure-Requests": "1", + "Host": "9itr2lba55.execute-api.us-east-1.amazonaws.com", + "X-Forwarded-Proto": "https", + "X-Amz-Cf-Id": "qgNdqKT0_3RMttu5KjUdnvHI3OKm1BWF8mGD2lX8_rVrJQhhp-MLDw==", + "CloudFront-Is-Tablet-Viewer": "false", + "X-Forwarded-Port": "443", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0", + "CloudFront-Is-Mobile-Viewer": "false", + "CloudFront-Is-Desktop-Viewer": "true", + }, + "stageVariables": None, + "path": "/", + } + expected = "50.191.225.98" + request = create_wsgi_request(event) + actual = request["REMOTE_ADDR"] + self.assertEqual(actual, expected) + def test_wsgi_path_info_unquoted(self): event = { "body": {}, @@ -973,6 +1031,89 @@ def test_wsgi_without_body(self): response_tuple = collections.namedtuple("Response", ["status_code", "content"]) response = response_tuple(200, "hello") + def test_wsgi_without_requestcontext(self): + event = { + "body": None, + "resource": "/", + "queryStringParameters": None, + "pathParameters": None, + "headers": { + "Via": "1.1 38205a04d96d60185e88658d3185ccee.cloudfront.net (CloudFront)", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Forwarded-Proto": "https", + "X-Forwarded-For": "71.231.27.57, 104.246.180.51", + "CloudFront-Viewer-Country": "US", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0", + "Host": "xo2z7zafjh.execute-api.us-east-1.amazonaws.com", + "X-Forwarded-Proto": "https", + "Cookie": "zappa=AQ4", + "CloudFront-Is-Tablet-Viewer": "false", + "X-Forwarded-Port": "443", + "Referer": "https://xo8z7zafjh.execute-api.us-east-1.amazonaws.com/former/post", + "CloudFront-Is-Mobile-Viewer": "false", + "X-Amz-Cf-Id": "31zxcUcVyUxBOMk320yh5NOhihn5knqrlYQYpGGyOngKKwJb0J0BAQ==", + "CloudFront-Is-Desktop-Viewer": "true", + }, + "stageVariables": None, + "path": "/", + "isBase64Encoded": True, + } + environ = create_wsgi_request(event, trailing_slash=False) + self.assertTrue(environ) + + def test_wsgi_with_authorizer(self): + expected_remote_user = "remote-user" + authorizer = { + "principalId": expected_remote_user, + } + event = { + "body": None, + "resource": "/", + "requestContext": { + "resourceId": "6cqjw9qu0b", + "apiId": "9itr2lba55", + "resourcePath": "/", + "httpMethod": "POST", + "requestId": "c17cb1bf-867c-11e6-b938-ed697406e3b5", + "accountId": "724336686645", + "authorizer": authorizer, + "stage": "devorr", + }, + "queryStringParameters": None, + "httpMethod": "POST", + "pathParameters": None, + "headers": { + "Via": "1.1 38205a04d96d60185e88658d3185ccee.cloudfront.net (CloudFront)", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "CloudFront-Is-SmartTV-Viewer": "false", + "CloudFront-Forwarded-Proto": "https", + "X-Forwarded-For": "71.231.27.57, 104.246.180.51", + "CloudFront-Viewer-Country": "US", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0", + "Host": "xo2z7zafjh.execute-api.us-east-1.amazonaws.com", + "X-Forwarded-Proto": "https", + "Cookie": "zappa=AQ4", + "CloudFront-Is-Tablet-Viewer": "false", + "X-Forwarded-Port": "443", + "Referer": "https://xo8z7zafjh.execute-api.us-east-1.amazonaws.com/former/post", + "CloudFront-Is-Mobile-Viewer": "false", + "X-Amz-Cf-Id": "31zxcUcVyUxBOMk320yh5NOhihn5knqrlYQYpGGyOngKKwJb0J0BAQ==", + "CloudFront-Is-Desktop-Viewer": "true", + }, + "stageVariables": None, + "path": "/", + "isBase64Encoded": True, + } + + environ = create_wsgi_request(event, trailing_slash=False) + self.assertEqual(environ["REMOTE_USER"], expected_remote_user) + self.assertDictEqual(environ["API_GATEWAY_AUTHORIZER"], authorizer) + def test_wsgi_from_apigateway_testbutton(self): """ API Gateway resources have a "test bolt" button on methods. @@ -2687,6 +2828,28 @@ def test_delete_lambda_concurrency(self, client): FunctionName="abc", ) + @mock.patch("sys.version_info", new_callable=get_unsupported_sys_versioninfo) + def test_unsupported_version_error(self, *_): + from importlib import reload + + with self.assertRaises(RuntimeError): + import zappa + + reload(zappa) + + def test_wsgi_query_string_unquoted(self): + event = { + "body": {}, + "headers": {}, + "pathParameters": {}, + "path": "/path/path1", + "httpMethod": "GET", + "queryStringParameters": {"a": "A,B", "b": "C#D"}, + "requestContext": {}, + } + request = create_wsgi_request(event) + self.assertEqual(request["QUERY_STRING"], "a=A,B&b=C#D") + if __name__ == "__main__": unittest.main() diff --git a/tests/utils.py b/tests/utils.py index 91e588cc4..db48b0db5 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ import functools import os +from collections import namedtuple from contextlib import contextmanager import boto3 @@ -72,3 +73,9 @@ def stub_open(*args, **kwargs): with patch("__builtin__.open", stub_open): yield mock_open, mock_file + + +def get_unsupported_sys_versioninfo() -> tuple: + """Mock used to test the python unsupported version testcase""" + invalid_versioninfo = namedtuple("version_info", ["major", "minor", "micro", "releaselevel", "serial"]) + return invalid_versioninfo(3, 6, 1, "final", 0) diff --git a/zappa/wsgi.py b/zappa/wsgi.py index d2300bc3b..3b0729c36 100644 --- a/zappa/wsgi.py +++ b/zappa/wsgi.py @@ -1,9 +1,9 @@ import base64 import logging import sys +from io import BytesIO from urllib.parse import urlencode -import six from requestlogger import ApacheFormatter from werkzeug import urls @@ -25,26 +25,26 @@ def create_wsgi_request( Given some event_info via API Gateway, create and return a valid WSGI request environ. """ - method = event_info["httpMethod"] + method = event_info.get("httpMethod", None) headers = merge_headers(event_info) or {} # Allow for the AGW console 'Test' button to work (Pull #735) - """ - API Gateway and ALB both started allowing for multi-value querystring - params in Nov. 2018. If there aren't multi-value params present, then - it acts identically to 'queryStringParameters', so we can use it as a - drop-in replacement. - - The one caveat here is that ALB will only include _one_ of - queryStringParameters _or_ multiValueQueryStringParameters, which means - we have to check for the existence of one and then fall back to the - other. - """ + # API Gateway and ALB both started allowing for multi-value querystring + # params in Nov. 2018. If there aren't multi-value params present, then + # it acts identically to 'queryStringParameters', so we can use it as a + # drop-in replacement. + # + # The one caveat here is that ALB will only include _one_ of + # queryStringParameters _or_ multiValueQueryStringParameters, which means + # we have to check for the existence of one and then fall back to the + # other. + if "multiValueQueryStringParameters" in event_info: query = event_info["multiValueQueryStringParameters"] query_string = urlencode(query, doseq=True) if query else "" else: query = event_info.get("queryStringParameters", {}) query_string = urlencode(query) if query else "" + query_string = urls.url_unquote(query_string) if context_header_mappings: for key, value in context_header_mappings.items(): @@ -59,13 +59,6 @@ def create_wsgi_request( if header_val is not None: headers[key] = header_val - # Extract remote user from context if Authorizer is enabled - remote_user = None - if event_info["requestContext"].get("authorizer"): - remote_user = event_info["requestContext"]["authorizer"].get("principalId") - elif event_info["requestContext"].get("identity"): - remote_user = event_info["requestContext"]["identity"].get("userArn") - # Related: https://github.com/Miserlou/Zappa/issues/677 # https://github.com/Miserlou/Zappa/issues/683 # https://github.com/Miserlou/Zappa/issues/696 @@ -77,12 +70,12 @@ def create_wsgi_request( body = base64.b64decode(encoded_body) else: body = event_info["body"] - if isinstance(body, six.string_types): + if isinstance(body, str): body = body.encode("utf-8") else: body = event_info["body"] - if isinstance(body, six.string_types): + if isinstance(body, str): body = body.encode("utf-8") # Make header names canonical, e.g. content-type => Content-Type @@ -100,7 +93,8 @@ def create_wsgi_request( if "," in x_forwarded_for: # The last one is the cloudfront proxy ip. The second to last is the real client ip. # Everything else is user supplied and untrustworthy. - remote_addr = x_forwarded_for.split(", ")[-2] + addresses = [addr.strip() for addr in x_forwarded_for.split(",")] + remote_addr = addresses[-2] else: remote_addr = x_forwarded_for or "127.0.0.1" @@ -122,13 +116,24 @@ def create_wsgi_request( "wsgi.run_once": False, } + # Systems calling the Lambda (other than API Gateway) may not provide the field requestContext + # Extract remote_user, authorizer if Authorizer is enabled + remote_user = None + if "requestContext" in event_info: + authorizer = event_info["requestContext"].get("authorizer", None) + if authorizer: + remote_user = authorizer.get("principalId") + environ["API_GATEWAY_AUTHORIZER"] = authorizer + elif event_info["requestContext"].get("identity"): + remote_user = event_info["requestContext"]["identity"].get("userArn") + # Input processing if method in ["POST", "PUT", "PATCH", "DELETE"]: if "Content-Type" in headers: environ["CONTENT_TYPE"] = headers["Content-Type"] # This must be Bytes or None - environ["wsgi.input"] = six.BytesIO(body) + environ["wsgi.input"] = BytesIO(body) if body: environ["CONTENT_LENGTH"] = str(len(body)) else: @@ -148,9 +153,6 @@ def create_wsgi_request( if remote_user: environ["REMOTE_USER"] = remote_user - if event_info["requestContext"].get("authorizer"): - environ["API_GATEWAY_AUTHORIZER"] = event_info["requestContext"]["authorizer"] - return environ