airbytehq · brianjlai · Sep 23, 2024 · Sep 26, 2024 · Oct 1, 2024 · Oct 2, 2024
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py
@@ -28,7 +28,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
         self._parameters = parameters
 
     def check_connection(self, source: Source, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
-        streams = source.streams(config)  # type: ignore # source is always a DeclarativeSource, but this parameter type adheres to the outer interface
+        streams = source.all_streams(config)  # type: ignore # source is always a DeclarativeSource, but this parameter type adheres to the outer interface
         stream_name_to_stream = {s.name: s for s in streams}
         if len(streams) == 0:
             return False, f"No streams to connect to from source {source}"

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/concurrent_declarative_source.py
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_source.py
@@ -4,10 +4,11 @@
 
 import logging
 from abc import abstractmethod
-from typing import Any, Mapping, Tuple
+from typing import Any, List, Mapping, Tuple
 
 from airbyte_cdk.sources.abstract_source import AbstractSource
 from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
+from airbyte_cdk.sources.streams import Stream
 
 
 class DeclarativeSource(AbstractSource):
@@ -32,3 +33,6 @@ def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) ->
           The error object will be cast to string to display the problem to the user.
         """
         return self.connection_checker.check_connection(self, logger, config)
+
+    def all_streams(self, config: Mapping[str, Any]) -> List[Stream]:
+        return self.streams(config=config)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py
@@ -378,3 +378,18 @@ def set_runtime_lookback_window(self, lookback_window_in_seconds: int) -> None:
         # Check if the new runtime lookback window is greater than the current config lookback
         if parse_duration(runtime_lookback_window) > config_lookback:
             self._lookback_window = InterpolatedString.create(runtime_lookback_window, parameters={})
+
+    def get_start_datetime(self) -> MinMaxDatetime:
+        return self._start_datetime
+
+    def get_end_datetime(self) -> Optional[MinMaxDatetime]:
+        return self._end_datetime
+
+    def get_step(self) -> Union[timedelta, Duration]:
+        return self._step
+
+    def get_partition_field_start(self) -> InterpolatedString:
+        return self._partition_field_start
+
+    def get_partition_field_end(self) -> InterpolatedString:
+        return self._partition_field_end
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py
@@ -3,23 +3,37 @@
 #
 
 import pkgutil
-from typing import Any
+from typing import Any, List, Mapping, Optional
 
 import yaml
-from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
+from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
+from airbyte_cdk.sources.declarative.concurrent_declarative_source import ConcurrentDeclarativeSource
 from airbyte_cdk.sources.types import ConnectionDefinition
 
 
-class YamlDeclarativeSource(ManifestDeclarativeSource):
+class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]):
     """Declarative source defined by a yaml file"""
 
-    def __init__(self, path_to_yaml: str, debug: bool = False) -> None:
+    def __init__(
+        self,
+        path_to_yaml: str,
+        debug: bool = False,
+        catalog: Optional[ConfiguredAirbyteCatalog] = None,
+        config: Optional[Mapping[str, Any]] = None,
+        state: Optional[List[AirbyteStateMessage]] = None,
+    ) -> None:
         """
         :param path_to_yaml: Path to the yaml file describing the source
         """
         self._path_to_yaml = path_to_yaml
         source_config = self._read_and_parse_yaml_file(path_to_yaml)
-        super().__init__(source_config, debug)
+
+        super().__init__(
+            catalog=catalog or ConfiguredAirbyteCatalog(streams=[]),
+            config=config or {},
+            state=state or [],
+            source_config=source_config,
+        )
 
     def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition:
         package = self.__class__.__module__.split(".")[0]

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/adapters.py
@@ -5,6 +5,7 @@
 import copy
 import json
 import logging
+from datetime import datetime
 from functools import lru_cache
 from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
 
@@ -24,6 +25,7 @@
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
 from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
+from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import DateTimeStreamStateConverter
 from airbyte_cdk.sources.streams.core import StreamData
 from airbyte_cdk.sources.types import StreamSlice
 from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
@@ -203,6 +205,11 @@ class SliceEncoder(json.JSONEncoder):
     def default(self, obj: Any) -> Any:
         if hasattr(obj, "__json_serializable__"):
             return obj.__json_serializable__()
+
+        # This needs to be revisited as we can't lose precision
+        if isinstance(obj, datetime):
+            return list(obj.timetuple())[0:6]
+
         # Let the base class default method raise the TypeError
         return super().default(obj)
 
@@ -341,12 +348,17 @@ class CursorPartitionGenerator(PartitionGenerator):
     across partitions. Each partition represents a subset of the stream's data and is determined by the cursor's state.
     """
 
+    _START_BOUNDARY = 0
+    _END_BOUNDARY = 1
+
     def __init__(
         self,
         stream: Stream,
         message_repository: MessageRepository,
         cursor: Cursor,
+        connector_state_converter: DateTimeStreamStateConverter,
         cursor_field: Optional[List[str]],
+        slice_boundary_fields: Optional[Tuple[str, str]],
     ):
         """
         Initialize the CursorPartitionGenerator with a stream, sync mode, and cursor.
@@ -362,6 +374,8 @@ def __init__(
         self._cursor = cursor
         self._cursor_field = cursor_field
         self._state = self._cursor.state
+        self._slice_boundary_fields = slice_boundary_fields
+        self._connector_state_converter = connector_state_converter
 
     def generate(self) -> Iterable[Partition]:
         """
@@ -372,8 +386,19 @@ def generate(self) -> Iterable[Partition]:
 
         :return: An iterable of StreamPartition objects.
         """
-        for slice_start, slice_end in self._cursor.generate_slices():
-            stream_slice = StreamSlice(partition={}, cursor_slice={"start": slice_start, "end": slice_end})
+
+        start_boundary = self._slice_boundary_fields[self._START_BOUNDARY] if self._slice_boundary_fields else "start"
+        end_boundary = self._slice_boundary_fields[self._END_BOUNDARY] if self._slice_boundary_fields else "end"
+
+        wam = list(self._cursor.generate_slices())
+        for slice_start, slice_end in wam:
+            stream_slice = StreamSlice(
+                partition={},
+                cursor_slice={
+                    start_boundary: self._connector_state_converter.output_format(slice_start),
+                    end_boundary: self._connector_state_converter.output_format(slice_end),
+                },
+            )
 
             yield StreamPartition(
                 self._stream,
@@ -386,7 +411,7 @@ def generate(self) -> Iterable[Partition]:
             )
 
 
-@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
+@deprecated("Availability strategy has been soft deprecated. Do not use. Class is subject to removal", category=ExperimentalClassWarning)
 class AvailabilityStrategyFacade(AvailabilityStrategy):
     def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy):
         self._abstract_availability_strategy = abstract_availability_strategy

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/availability_strategy.py
@@ -67,6 +67,7 @@ def check_availability(self, logger: logging.Logger) -> StreamAvailability:
         """
 
 
+@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
 class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
     """
     An availability strategy that always indicates a stream is available.

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/cursor.py
@@ -321,7 +321,10 @@ def _split_per_slice_range(self, lower: CursorValueType, upper: CursorValueType)
 
         lower = max(lower, self._start) if self._start else lower
         if not self._slice_range or lower + self._slice_range >= upper:
-            yield lower, upper
+            if self._cursor_granularity:
+                yield lower, upper - self._cursor_granularity
+            else:
+                yield lower, upper
         else:
             stop_processing = False
             current_lower_boundary = lower

diff --git a/...irbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/...irbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py
@@ -157,3 +157,17 @@ def parse_timestamp(self, timestamp: str) -> datetime:
         if not isinstance(dt_object, DateTime):
             raise ValueError(f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})")
         return dt_object  # type: ignore  # we are manually type checking because pendulum.parse may return different types
+
+
+class CustomOutputFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
+    """
+    Datetime State converter that emits state according to the supplied datetime format. The converter supports reading
+    incoming state in any valid datetime format via Pendulum.
+    """
+
+    def __init__(self, datetime_format: str, is_sequential_state: bool = True, cursor_granularity: Optional[timedelta] = None):
+        super().__init__(is_sequential_state=is_sequential_state, cursor_granularity=cursor_granularity)
+        self._datetime_format = datetime_format
+
+    def output_format(self, timestamp: datetime) -> str:
+        return timestamp.strftime(self._datetime_format)
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py b/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py
@@ -40,7 +40,7 @@ def test_check_stream_with_slices_as_list(test_name, record, streams_to_check, s
     stream.read_records.side_effect = mock_read_records({frozenset(stream_slice): iter([record])})
 
     source = MagicMock()
-    source.streams.return_value = [stream]
+    source.all_streams.return_value = [stream]
 
     check_stream = CheckStream(streams_to_check, parameters={})
 
@@ -63,7 +63,7 @@ def test_check_empty_stream():
     stream.stream_slices.return_value = iter([None])
 
     source = MagicMock()
-    source.streams.return_value = [stream]
+    source.all_streams.return_value = [stream]
 
     check_stream = CheckStream(["s1"], parameters={})
     stream_is_available, reason = check_stream.check_connection(source, logger, config)
@@ -76,7 +76,7 @@ def test_check_stream_with_no_stream_slices_aborts():
     stream.stream_slices.return_value = iter([])
 
     source = MagicMock()
-    source.streams.return_value = [stream]
+    source.all_streams.return_value = [stream]
 
     check_stream = CheckStream(["s1"], parameters={})
     stream_is_available, reason = check_stream.check_connection(source, logger, config)
@@ -123,7 +123,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp
     assert isinstance(http_stream, HttpStream)
 
     source = MagicMock()
-    source.streams.return_value = [http_stream]
+    source.all_streams.return_value = [http_stream]
 
     check_stream = CheckStream(stream_names=["mock_http_stream"], parameters={})