Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(idempotency): sorting keys before hashing #639

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def _generate_hash(self, data: Any) -> str:

"""
data = getattr(data, "raw_event", data) # could be a data class depending on decorator order
hashed_data = self.hash_function(json.dumps(data, cls=Encoder).encode())
hashed_data = self.hash_function(json.dumps(data, cls=Encoder, sort_keys=True).encode())
return hashed_data.hexdigest()

def _validate_payload(self, data: Dict[str, Any], data_record: DataRecord) -> None:
Expand Down Expand Up @@ -310,7 +310,7 @@ def save_success(self, data: Dict[str, Any], result: dict) -> None:
result: dict
The response from function
"""
response_data = json.dumps(result, cls=Encoder)
response_data = json.dumps(result, cls=Encoder, sort_keys=True)

data_record = DataRecord(
idempotency_key=self._get_hashed_idempotency_key(data=data),
Expand Down
14 changes: 9 additions & 5 deletions tests/functional/idempotency/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
TABLE_NAME = "TEST_TABLE"


def serialize(data):
return json.dumps(data, sort_keys=True, cls=Encoder)


@pytest.fixture(scope="module")
def config() -> Config:
return Config(region_name="us-east-1")
Expand Down Expand Up @@ -62,12 +66,12 @@ def lambda_response():

@pytest.fixture(scope="module")
def serialized_lambda_response(lambda_response):
return json.dumps(lambda_response, cls=Encoder)
return serialize(lambda_response)


@pytest.fixture(scope="module")
def deserialized_lambda_response(lambda_response):
return json.loads(json.dumps(lambda_response, cls=Encoder))
return json.loads(serialize(lambda_response))


@pytest.fixture
Expand Down Expand Up @@ -144,20 +148,20 @@ def expected_params_put_item_with_validation(hashed_idempotency_key, hashed_vali
def hashed_idempotency_key(lambda_apigw_event, default_jmespath, lambda_context):
compiled_jmespath = jmespath.compile(default_jmespath)
data = compiled_jmespath.search(lambda_apigw_event)
return "test-func#" + hashlib.md5(json.dumps(data).encode()).hexdigest()
return "test-func#" + hashlib.md5(serialize(data).encode()).hexdigest()


@pytest.fixture
def hashed_idempotency_key_with_envelope(lambda_apigw_event):
event = extract_data_from_envelope(
data=lambda_apigw_event, envelope=envelopes.API_GATEWAY_HTTP, jmespath_options={}
)
return "test-func#" + hashlib.md5(json.dumps(event).encode()).hexdigest()
return "test-func#" + hashlib.md5(serialize(event).encode()).hexdigest()


@pytest.fixture
def hashed_validation_key(lambda_apigw_event):
return hashlib.md5(json.dumps(lambda_apigw_event["requestContext"]).encode()).hexdigest()
return hashlib.md5(serialize(lambda_apigw_event["requestContext"]).encode()).hexdigest()


@pytest.fixture
Expand Down
34 changes: 26 additions & 8 deletions tests/functional/idempotency/test_idempotency.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from aws_lambda_powertools.utilities.idempotency.idempotency import idempotent, idempotent_function
from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord
from aws_lambda_powertools.utilities.validation import envelopes, validator
from tests.functional.idempotency.conftest import serialize
from tests.functional.utils import load_event

TABLE_NAME = "TEST_TABLE"
Expand Down Expand Up @@ -741,7 +742,7 @@ def test_default_no_raise_on_missing_idempotency_key(
hashed_key = persistence_store._get_hashed_idempotency_key({})

# THEN return the hash of None
expected_value = "test-func#" + md5(json.dumps(None).encode()).hexdigest()
expected_value = "test-func#" + md5(serialize(None).encode()).hexdigest()
assert expected_value == hashed_key


Expand Down Expand Up @@ -785,7 +786,7 @@ def test_jmespath_with_powertools_json(
expected_value = [sub_attr_value, key_attr_value]
api_gateway_proxy_event = {
"requestContext": {"authorizer": {"claims": {"sub": sub_attr_value}}},
"body": json.dumps({"id": key_attr_value}),
"body": serialize({"id": key_attr_value}),
}

# WHEN calling _get_hashed_idempotency_key
Expand Down Expand Up @@ -869,7 +870,7 @@ def _delete_record(self, data_record: DataRecord) -> None:
def test_idempotent_lambda_event_source(lambda_context):
# Scenario to validate that we can use the event_source decorator before or after the idempotent decorator
mock_event = load_event("apiGatewayProxyV2Event.json")
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}

# GIVEN an event_source decorator
Expand All @@ -889,7 +890,7 @@ def lambda_handler(event, _):
def test_idempotent_function():
# Scenario to validate we can use idempotent_function with any function
mock_event = {"data": "value"}
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}

@idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record")
Expand All @@ -906,7 +907,7 @@ def test_idempotent_function_arbitrary_args_kwargs():
# Scenario to validate we can use idempotent_function with a function
# with an arbitrary number of args and kwargs
mock_event = {"data": "value"}
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}

@idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record")
Expand All @@ -921,7 +922,7 @@ def record_handler(arg_one, arg_two, record, is_record):

def test_idempotent_function_invalid_data_kwarg():
mock_event = {"data": "value"}
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}
keyword_argument = "payload"

Expand All @@ -938,7 +939,7 @@ def record_handler(record):

def test_idempotent_function_arg_instead_of_kwarg():
mock_event = {"data": "value"}
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}
keyword_argument = "record"

Expand All @@ -956,7 +957,7 @@ def record_handler(record):
def test_idempotent_function_and_lambda_handler(lambda_context):
# Scenario to validate we can use both idempotent_function and idempotent decorators
mock_event = {"data": "value"}
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(mock_event).encode()).hexdigest())
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(serialize(mock_event).encode()).hexdigest())
expected_result = {"message": "Foo"}

@idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record")
Expand All @@ -976,3 +977,20 @@ def lambda_handler(event, _):
# THEN we expect the function and lambda handler to execute successfully
assert fn_result == expected_result
assert handler_result == expected_result


def test_idempotent_data_sorting():
# Scenario to validate same data in different order hashes to the same idempotency key
data_one = {"data": "test message 1", "more_data": "more data 1"}
data_two = {"more_data": "more data 1", "data": "test message 1"}

# Assertion will happen in MockPersistenceLayer
persistence_layer = MockPersistenceLayer("test-func#" + hashlib.md5(json.dumps(data_one).encode()).hexdigest())

# GIVEN
@idempotent_function(data_keyword_argument="payload", persistence_store=persistence_layer)
def dummy(payload):
return {"message": "hello"}

# WHEN
dummy(payload=data_two)