From 4cc52dbbd163234a5a1d2b06dfd4e8f2b02e4dae Mon Sep 17 00:00:00 2001 From: Dan Norman Date: Tue, 21 Nov 2023 17:34:26 -0700 Subject: [PATCH] test: Benchmarks for record datetime parsing (#2054) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added benchmarks for datetime parsing * update comments * changed record from fixture to fucntion variables * refactored to use itertools.repeat and dict.copy() --------- Co-authored-by: Edgar Ramírez Mondragón <16805946+edgarrmondragon@users.noreply.github.com> --- tests/core/sinks/test_validation.py | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/core/sinks/test_validation.py b/tests/core/sinks/test_validation.py index 5a7ca39a3..a539949cd 100644 --- a/tests/core/sinks/test_validation.py +++ b/tests/core/sinks/test_validation.py @@ -1,6 +1,9 @@ from __future__ import annotations import datetime +import itertools + +import pytest from tests.conftest import BatchSinkMock, TargetMock @@ -39,3 +42,60 @@ def test_validate_record(): ) assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00" assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999" + + +@pytest.fixture +def bench_sink() -> BatchSinkMock: + target = TargetMock() + return BatchSinkMock( + target, + "users", + { + "type": "object", + "properties": { + "id": {"type": "integer"}, + "created_at": {"type": "string", "format": "date-time"}, + "updated_at": {"type": "string", "format": "date-time"}, + "deleted_at": {"type": "string", "format": "date-time"}, + }, + }, + ["id"], + ) + + +@pytest.fixture +def bench_record(): + return { + "id": 1, + "created_at": "2021-01-01T00:08:00-07:00", + "updated_at": "2022-01-02T00:09:00-07:00", + "deleted_at": "2023-01-03T00:10:00.0000", + } + + +def test_bench_parse_timestamps_in_record(benchmark, bench_sink, bench_record): + """Run benchmark for Sink method _parse_timestamps_in_record.""" + number_of_runs = 10000 + + sink: BatchSinkMock = bench_sink + + def run_parse_timestamps_in_record(): + for record in itertools.repeat(bench_record, number_of_runs): + _ = sink._parse_timestamps_in_record( + record.copy(), sink.schema, sink.datetime_error_treatment + ) + + benchmark(run_parse_timestamps_in_record) + + +def test_bench_validate_and_parse(benchmark, bench_sink, bench_record): + """Run benchmark for Sink method _validate_and_parse.""" + number_of_runs = 10000 + + sink: BatchSinkMock = bench_sink + + def run_validate_and_parse(): + for record in itertools.repeat(bench_record, number_of_runs): + _ = sink._validate_and_parse(record.copy()) + + benchmark(run_validate_and_parse)