Skip to content

Commit

Permalink
test: Benchmarks for record datetime parsing (#2054)
Browse files Browse the repository at this point in the history
* added benchmarks for datetime parsing

* update comments

* changed record from fixture to fucntion variables

* refactored to use itertools.repeat and dict.copy()

---------

Co-authored-by: Edgar Ramírez Mondragón <[email protected]>
  • Loading branch information
BuzzCutNorman and edgarrmondragon authored Nov 22, 2023
1 parent f96df37 commit 4cc52db
Showing 1 changed file with 60 additions and 0 deletions.
60 changes: 60 additions & 0 deletions tests/core/sinks/test_validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import datetime
import itertools

import pytest

from tests.conftest import BatchSinkMock, TargetMock

Expand Down Expand Up @@ -39,3 +42,60 @@ def test_validate_record():
)
assert updated_record["missing_datetime"] == "2021-01-01T00:00:00+00:00"
assert updated_record["invalid_datetime"] == "9999-12-31 23:59:59.999999"


@pytest.fixture
def bench_sink() -> BatchSinkMock:
target = TargetMock()
return BatchSinkMock(
target,
"users",
{
"type": "object",
"properties": {
"id": {"type": "integer"},
"created_at": {"type": "string", "format": "date-time"},
"updated_at": {"type": "string", "format": "date-time"},
"deleted_at": {"type": "string", "format": "date-time"},
},
},
["id"],
)


@pytest.fixture
def bench_record():
return {
"id": 1,
"created_at": "2021-01-01T00:08:00-07:00",
"updated_at": "2022-01-02T00:09:00-07:00",
"deleted_at": "2023-01-03T00:10:00.0000",
}


def test_bench_parse_timestamps_in_record(benchmark, bench_sink, bench_record):
"""Run benchmark for Sink method _parse_timestamps_in_record."""
number_of_runs = 10000

sink: BatchSinkMock = bench_sink

def run_parse_timestamps_in_record():
for record in itertools.repeat(bench_record, number_of_runs):
_ = sink._parse_timestamps_in_record(
record.copy(), sink.schema, sink.datetime_error_treatment
)

benchmark(run_parse_timestamps_in_record)


def test_bench_validate_and_parse(benchmark, bench_sink, bench_record):
"""Run benchmark for Sink method _validate_and_parse."""
number_of_runs = 10000

sink: BatchSinkMock = bench_sink

def run_validate_and_parse():
for record in itertools.repeat(bench_record, number_of_runs):
_ = sink._validate_and_parse(record.copy())

benchmark(run_validate_and_parse)

0 comments on commit 4cc52db

Please sign in to comment.