diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 45e4766b6c..87ce29c457 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -26,7 +26,7 @@ jobs: matrix: os: - ubuntu-latest - python-version: ["3.8.x", "3.9.x", "3.10.x", "3.11.x"] + python-version: ["3.9.x", "3.10.x", "3.11.x"] defaults: run: @@ -49,6 +49,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_airflow.yml b/.github/workflows/test_airflow.yml index 2a96c4475e..4e0453b62f 100644 --- a/.github/workflows/test_airflow.yml +++ b/.github/workflows/test_airflow.yml @@ -37,6 +37,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_build_images.yml b/.github/workflows/test_build_images.yml index 665f8b2509..c5c3704f92 100644 --- a/.github/workflows/test_build_images.yml +++ b/.github/workflows/test_build_images.yml @@ -37,6 +37,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Build images run: make test-build-images diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 359ed43095..d13440d89a 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -37,8 +37,6 @@ jobs: python-version: ["3.11.x"] # Test all python versions on ubuntu only include: - - python-version: "3.8.x" - os: "ubuntu-latest" - python-version: "3.9.x" os: "ubuntu-latest" - python-version: "3.10.x" @@ -79,6 +77,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 # NOTE: do not cache. we want to have a clean state each run and we upgrade depdendencies later # - name: Load cached venv diff --git a/.github/workflows/test_dbt_cloud.yml b/.github/workflows/test_dbt_cloud.yml index 5b57dc77c3..d88c1b914d 100644 --- a/.github/workflows/test_dbt_cloud.yml +++ b/.github/workflows/test_dbt_cloud.yml @@ -52,6 +52,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_dbt_runner.yml b/.github/workflows/test_dbt_runner.yml index ad29909d9a..9b6ced0d1a 100644 --- a/.github/workflows/test_dbt_runner.yml +++ b/.github/workflows/test_dbt_runner.yml @@ -49,6 +49,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_athena.yml b/.github/workflows/test_destination_athena.yml index 03eb7f9434..b808d5eaa6 100644 --- a/.github/workflows/test_destination_athena.yml +++ b/.github/workflows/test_destination_athena.yml @@ -56,6 +56,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_athena_iceberg.yml b/.github/workflows/test_destination_athena_iceberg.yml index 3412e789e3..577fa030a1 100644 --- a/.github/workflows/test_destination_athena_iceberg.yml +++ b/.github/workflows/test_destination_athena_iceberg.yml @@ -56,6 +56,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_bigquery.yml b/.github/workflows/test_destination_bigquery.yml index eb8b63f757..1f438e2dde 100644 --- a/.github/workflows/test_destination_bigquery.yml +++ b/.github/workflows/test_destination_bigquery.yml @@ -55,6 +55,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_clickhouse.yml b/.github/workflows/test_destination_clickhouse.yml index 46464ea462..8a987ba676 100644 --- a/.github/workflows/test_destination_clickhouse.yml +++ b/.github/workflows/test_destination_clickhouse.yml @@ -52,6 +52,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_databricks.yml b/.github/workflows/test_destination_databricks.yml index c1609de863..98c48b0bc9 100644 --- a/.github/workflows/test_destination_databricks.yml +++ b/.github/workflows/test_destination_databricks.yml @@ -55,6 +55,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_dremio.yml b/.github/workflows/test_destination_dremio.yml index 4bc48c54db..e16496b725 100644 --- a/.github/workflows/test_destination_dremio.yml +++ b/.github/workflows/test_destination_dremio.yml @@ -56,6 +56,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_lancedb.yml b/.github/workflows/test_destination_lancedb.yml index 6be89d3de3..960d0cacf9 100644 --- a/.github/workflows/test_destination_lancedb.yml +++ b/.github/workflows/test_destination_lancedb.yml @@ -53,6 +53,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_motherduck.yml b/.github/workflows/test_destination_motherduck.yml index db81131266..20ab470aa9 100644 --- a/.github/workflows/test_destination_motherduck.yml +++ b/.github/workflows/test_destination_motherduck.yml @@ -55,6 +55,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_mssql.yml b/.github/workflows/test_destination_mssql.yml index 6fdd7a5bc5..14da512a79 100644 --- a/.github/workflows/test_destination_mssql.yml +++ b/.github/workflows/test_destination_mssql.yml @@ -60,6 +60,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_qdrant.yml b/.github/workflows/test_destination_qdrant.yml index c35a171bce..01f9cc44d6 100644 --- a/.github/workflows/test_destination_qdrant.yml +++ b/.github/workflows/test_destination_qdrant.yml @@ -54,6 +54,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_snowflake.yml b/.github/workflows/test_destination_snowflake.yml index 73a2a8f6e7..29556e7eeb 100644 --- a/.github/workflows/test_destination_snowflake.yml +++ b/.github/workflows/test_destination_snowflake.yml @@ -55,6 +55,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destination_synapse.yml b/.github/workflows/test_destination_synapse.yml index 8f6bf1eb29..69c0cda93f 100644 --- a/.github/workflows/test_destination_synapse.yml +++ b/.github/workflows/test_destination_synapse.yml @@ -58,6 +58,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml index a9306c2f9c..dbc3ca0c5a 100644 --- a/.github/workflows/test_destinations.yml +++ b/.github/workflows/test_destinations.yml @@ -67,6 +67,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 # - name: Load cached venv # id: cached-poetry-dependencies diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index ae06a72df9..e8a8fcb7b7 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -78,6 +78,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml index 706bae1b0c..437d30aef6 100644 --- a/.github/workflows/test_local_destinations.yml +++ b/.github/workflows/test_local_destinations.yml @@ -86,6 +86,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_local_sources.yml b/.github/workflows/test_local_sources.yml index 39689f5c85..95a7023dbf 100644 --- a/.github/workflows/test_local_sources.yml +++ b/.github/workflows/test_local_sources.yml @@ -73,6 +73,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/.github/workflows/test_sqlalchemy_destinations.yml b/.github/workflows/test_sqlalchemy_destinations.yml index 1f00373674..58348674e0 100644 --- a/.github/workflows/test_sqlalchemy_destinations.yml +++ b/.github/workflows/test_sqlalchemy_destinations.yml @@ -77,6 +77,7 @@ jobs: virtualenvs-create: true virtualenvs-in-project: true installer-parallel: true + version: 1.8.5 - name: Load cached venv id: cached-poetry-dependencies diff --git a/dlt/cli/pipeline_files.py b/dlt/cli/pipeline_files.py index c0139fe2a7..0ff834c12f 100644 --- a/dlt/cli/pipeline_files.py +++ b/dlt/cli/pipeline_files.py @@ -4,11 +4,12 @@ import yaml import posixpath from pathlib import Path -from typing import Dict, NamedTuple, Sequence, Tuple, TypedDict, List, Literal +from typing import Dict, NamedTuple, Sequence, Tuple, List, Literal from dlt.cli.exceptions import VerifiedSourceRepoError from dlt.common import git from dlt.common.storages import FileStorage +from dlt.common.typing import TypedDict from dlt.common.reflection.utils import get_module_docstring diff --git a/dlt/common/exceptions.py b/dlt/common/exceptions.py index 6a86ab5fbe..5ee40341f5 100644 --- a/dlt/common/exceptions.py +++ b/dlt/common/exceptions.py @@ -1,4 +1,5 @@ -from typing import Any, AnyStr, Dict, List, Sequence, Optional, Iterable, Type, TypedDict +from typing import Any, AnyStr, Dict, List, Sequence, Optional, Type +from dlt.common.typing import TypedDict class ExceptionTrace(TypedDict, total=False): diff --git a/dlt/common/incremental/typing.py b/dlt/common/incremental/typing.py index 2ca981bff0..acffdf5cd1 100644 --- a/dlt/common/incremental/typing.py +++ b/dlt/common/incremental/typing.py @@ -1,8 +1,6 @@ -from typing_extensions import TypedDict - from typing import Any, Callable, List, Literal, Optional, Sequence, TypeVar, Union -from dlt.common.typing import TSortOrder, TTableHintTemplate, TColumnNames +from dlt.common.typing import TSortOrder, TTableHintTemplate, TColumnNames, TypedDict TCursorValue = TypeVar("TCursorValue", bound=Any) LastValueFunc = Callable[[Sequence[TCursorValue]], Any] diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py index 255fcd344e..c7ef450844 100644 --- a/dlt/common/libs/pyarrow.py +++ b/dlt/common/libs/pyarrow.py @@ -462,16 +462,8 @@ def to_arrow_scalar(value: Any, arrow_type: pyarrow.DataType) -> Any: def from_arrow_scalar(arrow_value: pyarrow.Scalar) -> Any: - """Converts arrow scalar into Python type. Currently adds "UTC" to naive date times and converts all others to UTC""" - row_value = arrow_value.as_py() - # dates are not represented as datetimes but I see connector-x represents - # datetimes as dates and keeping the exact time inside. probably a bug - # but can be corrected this way - if isinstance(row_value, date) and not isinstance(row_value, datetime): - row_value = pendulum.from_timestamp(arrow_value.cast(pyarrow.int64()).as_py() / 1000) - elif isinstance(row_value, datetime): - row_value = pendulum.instance(row_value).in_tz("UTC") - return row_value + """Converts arrow scalar into Python type.""" + return arrow_value.as_py() TNewColumns = Sequence[Tuple[int, pyarrow.Field, Callable[[pyarrow.Table], Iterable[Any]]]] diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py index df3554ff21..d4d78cd8eb 100644 --- a/dlt/common/libs/pydantic.py +++ b/dlt/common/libs/pydantic.py @@ -6,7 +6,6 @@ Generic, Optional, Set, - TypedDict, List, Type, Union, @@ -15,6 +14,7 @@ ) from typing_extensions import Annotated, get_args, get_origin +from dlt.common.typing import TypedDict from dlt.common.data_types import py_type_to_sc_type from dlt.common.exceptions import MissingDependencyException from dlt.common.schema import DataValidationError diff --git a/dlt/common/metrics.py b/dlt/common/metrics.py index 2f9f574dd0..7cccf47263 100644 --- a/dlt/common/metrics.py +++ b/dlt/common/metrics.py @@ -1,5 +1,6 @@ import datetime # noqa: I251 -from typing import Any, Dict, List, NamedTuple, Optional, Tuple, TypedDict # noqa: 251 +from typing import Any, Dict, List, NamedTuple, Optional, Tuple # noqa: 251 +from dlt.common.typing import TypedDict class DataWriterMetrics(NamedTuple): diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py index 36845b2e14..d243ed9578 100644 --- a/dlt/common/normalizers/json/relational.py +++ b/dlt/common/normalizers/json/relational.py @@ -8,10 +8,10 @@ Tuple, Type, cast, - TypedDict, Any, ) +from dlt.common.typing import TypedDict from dlt.common.normalizers.exceptions import InvalidJsonNormalizer from dlt.common.normalizers.typing import TJSONNormalizer from dlt.common.normalizers.utils import generate_dlt_id diff --git a/dlt/common/normalizers/typing.py b/dlt/common/normalizers/typing.py index 16ad097fde..5f0c4c55b0 100644 --- a/dlt/common/normalizers/typing.py +++ b/dlt/common/normalizers/typing.py @@ -1,7 +1,7 @@ -from typing import List, Optional, Type, TypedDict, Literal, Union +from typing import List, Optional, Type, Literal, Union from types import ModuleType -from dlt.common.typing import StrAny +from dlt.common.typing import StrAny, TypedDict from dlt.common.normalizers.naming import NamingConvention TNamingConventionReferenceArg = Union[str, Type[NamingConvention], ModuleType] diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index 9d3d5792ea..a23306375d 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -18,12 +18,12 @@ Sequence, Tuple, TypeVar, - TypedDict, Mapping, Literal, ) from typing_extensions import NotRequired +from dlt.common.typing import TypedDict from dlt.common.configuration import configspec from dlt.common.configuration import known_sections from dlt.common.configuration.container import Container diff --git a/dlt/common/runtime/typing.py b/dlt/common/runtime/typing.py index 9a4d0f3d48..8ab24bcf3a 100644 --- a/dlt/common/runtime/typing.py +++ b/dlt/common/runtime/typing.py @@ -1,9 +1,8 @@ from typing import ( List, Literal, - TypedDict, ) - +from dlt.common.typing import TypedDict TExecInfoNames = Literal[ "kubernetes", diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py index 6f5d6213c9..c26a525cc2 100644 --- a/dlt/common/schema/typing.py +++ b/dlt/common/schema/typing.py @@ -10,13 +10,13 @@ Set, Tuple, Type, - TypedDict, NewType, Union, get_args, ) from typing_extensions import Never +from dlt.common.typing import TypedDict from dlt.common.data_types import TDataType from dlt.common.normalizers.typing import TNormalizersConfig from dlt.common.typing import TSortOrder, TAnyDateTime, TLoaderFileFormat, TColumnNames diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index fb929031a1..f2e18ef584 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -9,7 +9,6 @@ Literal, cast, Tuple, - TypedDict, Optional, Union, Iterator, @@ -25,6 +24,7 @@ from fsspec.core import url_to_fs from dlt import version +from dlt.common.typing import TypedDict from dlt.common.pendulum import pendulum from dlt.common.configuration.specs import ( GcpCredentials, diff --git a/dlt/common/storages/load_package.py b/dlt/common/storages/load_package.py index d569fbe662..0e1c0ddf9c 100644 --- a/dlt/common/storages/load_package.py +++ b/dlt/common/storages/load_package.py @@ -21,10 +21,10 @@ cast, Any, Tuple, - TypedDict, ) from typing_extensions import NotRequired +from dlt.common.typing import TypedDict from dlt.common.pendulum import pendulum from dlt.common.json import json from dlt.common.configuration import configspec diff --git a/dlt/common/typing.py b/dlt/common/typing.py index 9866b72ed1..9a96248ba7 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -45,6 +45,8 @@ from typing_extensions import is_typeddict as _is_typeddict +from typing_extensions import TypedDict # noqa: I251 + try: from types import UnionType # type: ignore[attr-defined] except ImportError: diff --git a/dlt/common/versioned_state.py b/dlt/common/versioned_state.py index 52a26c6943..cc30c9c0b9 100644 --- a/dlt/common/versioned_state.py +++ b/dlt/common/versioned_state.py @@ -2,11 +2,12 @@ import hashlib import binascii from copy import copy -from typing import TypedDict, List, Tuple, Mapping +from typing import List, Tuple, Mapping from dlt.common.json import json from dlt.common.typing import DictStrAny from dlt.common.utils import compressed_b64decode, compressed_b64encode +from dlt.common.typing import TypedDict class TVersionedState(TypedDict, total=False): diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index cf9cdf0c40..5be03b816a 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -17,11 +17,10 @@ AnyStr, List, Generator, - TypedDict, cast, ) -from dlt.common.typing import TFun +from dlt.common.typing import TFun, TypedDict from dlt.common.schema.typing import TTableSchemaColumns from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.utils import concat_strings_with_limit diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index 9e4ee48191..e6853700b9 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -1,9 +1,10 @@ -from typing import Any, Dict, List, Sequence, Tuple, cast, TypedDict, Optional, Callable, Union +from typing import Any, Dict, List, Sequence, Tuple, cast, Optional, Callable, Union import yaml from dlt.common.time import ensure_pendulum_datetime from dlt.common.destination import PreparedTableSchema from dlt.common.destination.utils import resolve_merge_strategy +from dlt.common.typing import TypedDict from dlt.common.schema.typing import ( TSortOrder, diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py index 22a0062acf..f4382f0cb6 100644 --- a/dlt/extract/hints.py +++ b/dlt/extract/hints.py @@ -1,4 +1,4 @@ -from typing import TypedDict, cast, Any, Optional, Dict, Sequence, Mapping, Union +from typing import cast, Any, Optional, Dict, Sequence, Mapping, Union from typing_extensions import Self from dlt.common import logger @@ -19,6 +19,8 @@ MERGE_STRATEGIES, TTableReferenceParam, ) + +from dlt.common.typing import TypedDict from dlt.common.schema.utils import ( DEFAULT_WRITE_DISPOSITION, merge_column, diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py index 40734095cf..6d77758d59 100644 --- a/dlt/extract/incremental/__init__.py +++ b/dlt/extract/incremental/__init__.py @@ -30,7 +30,6 @@ coerce_value, py_type_to_sc_type, ) -from dlt.common.utils import without_none from dlt.extract.exceptions import IncrementalUnboundError from dlt.extract.incremental.exceptions import ( diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py index 1d213e26c2..a59675ce03 100644 --- a/dlt/extract/incremental/transform.py +++ b/dlt/extract/incremental/transform.py @@ -113,6 +113,19 @@ def __call__( row: TDataItem, ) -> Tuple[bool, bool, bool]: ... + @staticmethod + def _adapt_if_datetime(row_value: Any, last_value: Any) -> Any: + # For datetime cursor, ensure the value is a timezone aware datetime. + # The object saved in state will always be a tz aware pendulum datetime so this ensures values are comparable + if ( + isinstance(row_value, datetime) + and row_value.tzinfo is None + and isinstance(last_value, datetime) + and last_value.tzinfo is not None + ): + row_value = pendulum.instance(row_value).in_tz("UTC") + return row_value + @property def deduplication_disabled(self) -> bool: """Skip deduplication when length of the key is 0 or if lag is applied.""" @@ -185,19 +198,9 @@ def __call__( return None, False, False else: return row, False, False - last_value = self.last_value last_value_func = self.last_value_func - - # For datetime cursor, ensure the value is a timezone aware datetime. - # The object saved in state will always be a tz aware pendulum datetime so this ensures values are comparable - if ( - isinstance(row_value, datetime) - and row_value.tzinfo is None - and isinstance(last_value, datetime) - and last_value.tzinfo is not None - ): - row_value = pendulum.instance(row_value).in_tz("UTC") + row_value = self._adapt_if_datetime(row_value, last_value) # Check whether end_value has been reached # Filter end value ranges exclusively, so in case of "max" function we remove values >= end_value @@ -354,13 +357,8 @@ def __call__( # TODO: Json path support. For now assume the cursor_path is a column name cursor_path = self.cursor_path - - # The new max/min value try: - # NOTE: datetimes are always pendulum in UTC - row_value = from_arrow_scalar(self.compute(tbl[cursor_path])) cursor_data_type = tbl.schema.field(cursor_path).type - row_value_scalar = to_arrow_scalar(row_value, cursor_data_type) except KeyError as e: raise IncrementalCursorPathMissing( self.resource_name, @@ -371,6 +369,12 @@ def __call__( " must be a column name.", ) from e + # The new max/min value + row_value_scalar = self.compute( + tbl[cursor_path] + ) # to_arrow_scalar(row_value, cursor_data_type) + row_value = self._adapt_if_datetime(from_arrow_scalar(row_value_scalar), self.last_value) + if tbl.schema.field(cursor_path).nullable: tbl_without_null, tbl_with_null = self._process_null_at_cursor_path(tbl) tbl = tbl_without_null diff --git a/dlt/pipeline/drop.py b/dlt/pipeline/drop.py index cd982cf676..1e4788d08d 100644 --- a/dlt/pipeline/drop.py +++ b/dlt/pipeline/drop.py @@ -1,4 +1,4 @@ -from typing import Union, Iterable, Optional, List, Dict, Any, Tuple, TypedDict +from typing import Union, Iterable, Optional, List, Dict, Any, Tuple from copy import deepcopy from itertools import chain from dataclasses import dataclass @@ -12,6 +12,8 @@ reset_resource_state, _delete_source_state_keys, ) +from dlt.common.typing import TypedDict + from dlt.common.schema.typing import TSimpleRegex, TTableSchema from dlt.common.schema.utils import ( group_tables_by_resource, diff --git a/dlt/pipeline/platform.py b/dlt/pipeline/platform.py index e43e2f4dbe..5f30e579e5 100644 --- a/dlt/pipeline/platform.py +++ b/dlt/pipeline/platform.py @@ -1,5 +1,5 @@ """Implements SupportsTracking""" -from typing import Any, cast, TypedDict, List +from typing import Any, cast, List from requests import Session from dlt.common import logger @@ -7,6 +7,7 @@ from dlt.common.pipeline import LoadInfo from dlt.common.managed_thread_pool import ManagedThreadPool from dlt.common.schema.typing import TStoredSchema +from dlt.common.typing import TypedDict from dlt.pipeline.trace import PipelineTrace, PipelineStepTrace, TPipelineStep, SupportsPipeline diff --git a/dlt/sources/rest_api/typing.py b/dlt/sources/rest_api/typing.py index c48e54de4a..21de95f2ed 100644 --- a/dlt/sources/rest_api/typing.py +++ b/dlt/sources/rest_api/typing.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing_extensions import TypedDict from typing import ( Any, @@ -12,6 +11,7 @@ ) from dlt.common import jsonpath +from dlt.common.typing import TypedDict from dlt.common.schema.typing import ( TAnySchemaColumns, ) diff --git a/dlt/sources/sql_database/schema_types.py b/dlt/sources/sql_database/schema_types.py index 1cca0b81bb..b9cb29ef8a 100644 --- a/dlt/sources/sql_database/schema_types.py +++ b/dlt/sources/sql_database/schema_types.py @@ -7,14 +7,13 @@ List, Callable, Union, - TypedDict, Dict, ) from typing_extensions import TypeAlias from sqlalchemy.exc import NoReferencedTableError - +from dlt.common.typing import TypedDict from dlt.common.libs.sql_alchemy import Table, Column, Row, sqltypes, Select, TypeEngine from dlt.common import logger from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns, TTableReference diff --git a/docs/examples/archive/sources/singer_tap.py b/docs/examples/archive/sources/singer_tap.py index 3c733c33f1..0371e5b1a4 100644 --- a/docs/examples/archive/sources/singer_tap.py +++ b/docs/examples/archive/sources/singer_tap.py @@ -1,8 +1,9 @@ import os import tempfile -from typing import Any, Iterator, TypedDict, cast, Union +from typing import Any, Iterator, cast, Union import dlt +from dlt.common.typing import TypedDict from dlt.common import json from dlt.common.configuration.specs import BaseConfiguration from dlt.common.runners.venv import Venv diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md index 9e830407dc..0ab03a3491 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md +++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md @@ -76,6 +76,10 @@ You can place any ODBC-specific settings into the query string or **destination. destination.mssql.credentials="mssql://loader.database.windows.net/dlt_data?trusted_connection=yes" ``` +:::note +If you encounter missing credentials errors when using Windows authentication, set the 'username' and 'password' as empty strings in the TOML file. +::: + **To connect to a local SQL server instance running without SSL**, pass the `encrypt=no` parameter: ```toml destination.mssql.credentials="mssql://loader:loader@localhost/dlt_data?encrypt=no" diff --git a/tests/common/test_typing.py b/tests/common/test_typing.py index a298e7d50c..9107b25727 100644 --- a/tests/common/test_typing.py +++ b/tests/common/test_typing.py @@ -16,7 +16,6 @@ NewType, Sequence, TypeVar, - TypedDict, Optional, Union, ) @@ -48,6 +47,7 @@ is_callable_type, add_value_to_literal, get_generic_type_argument_from_instance, + TypedDict, ) diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py index 6899d8d5fe..62c805edff 100644 --- a/tests/common/test_validation.py +++ b/tests/common/test_validation.py @@ -10,7 +10,6 @@ Literal, Mapping, Sequence, - TypedDict, TypeVar, Optional, Union, @@ -24,7 +23,7 @@ TWriteDispositionConfig, ) from dlt.common.schema.utils import simple_regex_validator -from dlt.common.typing import DictStrStr, StrStr, TDataItem, TSortOrder, TColumnNames +from dlt.common.typing import DictStrStr, StrStr, TDataItem, TSortOrder, TColumnNames, TypedDict from dlt.common.validation import validate_dict, validate_dict_ignoring_xkeys diff --git a/tests/libs/pyarrow/test_pyarrow.py b/tests/libs/pyarrow/test_pyarrow.py index 07e8d3428d..bec4db2634 100644 --- a/tests/libs/pyarrow/test_pyarrow.py +++ b/tests/libs/pyarrow/test_pyarrow.py @@ -1,4 +1,4 @@ -from datetime import timezone, datetime, timedelta # noqa: I251 +from datetime import timezone, datetime, date, timedelta # noqa: I251 from copy import deepcopy from typing import List, Any @@ -109,25 +109,24 @@ def test_to_arrow_scalar() -> None: assert dt_converted == datetime(2021, 1, 1, 13, 2, 32, tzinfo=timezone.utc) -def test_from_arrow_scalar() -> None: +def test_arrow_type_coercion() -> None: + # coerce UTC python dt into naive arrow dt naive_dt = get_py_arrow_timestamp(6, tz=None) - sc_dt = to_arrow_scalar(datetime(2021, 1, 1, 5, 2, 32), naive_dt) - - # this value is like UTC - py_dt = from_arrow_scalar(sc_dt) - assert isinstance(py_dt, pendulum.DateTime) - # and we convert to explicit UTC - assert py_dt == datetime(2021, 1, 1, 5, 2, 32, tzinfo=timezone.utc) - - # converts to UTC - berlin_dt = get_py_arrow_timestamp(6, tz="Europe/Berlin") - sc_dt = to_arrow_scalar( - datetime(2021, 1, 1, 5, 2, 32, tzinfo=timezone(timedelta(hours=-8))), berlin_dt - ) + sc_dt = to_arrow_scalar(datetime(2021, 1, 1, 5, 2, 32, tzinfo=timezone.utc), naive_dt) + # does not convert to pendulum py_dt = from_arrow_scalar(sc_dt) - assert isinstance(py_dt, pendulum.DateTime) - assert py_dt.tzname() == "UTC" - assert py_dt == datetime(2021, 1, 1, 13, 2, 32, tzinfo=timezone.utc) + assert not isinstance(py_dt, pendulum.DateTime) + assert isinstance(py_dt, datetime) + assert py_dt.tzname() is None + + # coerce datetime into date + py_date = pa.date32() + sc_date = to_arrow_scalar(datetime(2021, 1, 1, 5, 2, 32, tzinfo=timezone.utc), py_date) + assert from_arrow_scalar(sc_date) == date(2021, 1, 1) + + py_date = pa.date64() + sc_date = to_arrow_scalar(datetime(2021, 1, 1, 5, 2, 32, tzinfo=timezone.utc), py_date) + assert from_arrow_scalar(sc_date) == date(2021, 1, 1) def _row_at_index(table: pa.Table, index: int) -> List[Any]: diff --git a/tests/libs/test_parquet_writer.py b/tests/libs/test_parquet_writer.py index b6a25c5db5..406b72c8c4 100644 --- a/tests/libs/test_parquet_writer.py +++ b/tests/libs/test_parquet_writer.py @@ -12,7 +12,6 @@ from dlt.common.schema.utils import new_column from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.time import ensure_pendulum_datetime -from dlt.common.libs.pyarrow import from_arrow_scalar from tests.common.data_writers.utils import get_writer from tests.cases import ( diff --git a/tests/libs/test_pydantic.py b/tests/libs/test_pydantic.py index 70846dcd72..b3bb024620 100644 --- a/tests/libs/test_pydantic.py +++ b/tests/libs/test_pydantic.py @@ -24,6 +24,7 @@ from datetime import datetime, date, time # noqa: I251 from dlt.common import Decimal from dlt.common import json +from dlt.common.schema.typing import TColumnType from dlt.common.libs.pydantic import ( DltConfig, @@ -724,3 +725,21 @@ class MyParent(Parent): assert "data_dictionary" not in schema assert "data_list" not in schema + + +def test_typed_dict_by_python_version(): + """when using typeddict in pydantic, it should be imported + from typing_extensions in python 3.11 and earlier and typing + in python 3.12 and later. + Here we test that this is properly set up in dlt. + """ + + class MyModel(BaseModel): + # TColumnType inherits from TypedDict + column_type: TColumnType + + m = MyModel(column_type={"data_type": "text"}) + assert m.column_type == {"data_type": "text"} + + with pytest.raises(ValidationError): + m = MyModel(column_type={"data_type": "invalid_type"}) # type: ignore[typeddict-item] diff --git a/tests/load/sources/sql_database/sql_source.py b/tests/load/sources/sql_database/sql_source.py index 3f8b89a2b5..6667cb1070 100644 --- a/tests/load/sources/sql_database/sql_source.py +++ b/tests/load/sources/sql_database/sql_source.py @@ -1,6 +1,6 @@ import random from copy import deepcopy -from typing import Dict, List, TypedDict +from typing import Dict, List from uuid import uuid4 import mimesis @@ -44,6 +44,7 @@ from dlt.common.pendulum import pendulum, timedelta from dlt.common.utils import chunks, uniq_id from dlt.sources.credentials import ConnectionStringCredentials +from dlt.common.typing import TypedDict class SQLAlchemySourceDB: diff --git a/tox.ini b/tox.ini index 059f6a586a..992591fe1c 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,8 @@ banned-modules = datetime = use dlt.common.pendulum decimal.Decimal = use dlt.common.Decimal open = use dlt.common.open pendulum = use dlt.common.pendulum + typing.TypedDict = use dlt.common.typing.TypedDict + typing-extensions.TypedDict = use dlt.common.typing.TypedDict extend-immutable-calls = dlt.sources.incremental per-file-ignores = tests/*: T20