From 340288193f57118c7b14bf70f7657cc5ddfdb7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Thu, 1 Feb 2024 23:26:24 -0600 Subject: [PATCH] feat: Implement the `organizations` stream (#296) --- README.md | 3 +- tap_readthedocs/__main__.py | 7 ++++ tap_readthedocs/client.py | 44 ++------------------ tap_readthedocs/streams.py | 83 ++++++++++++++++++++++++++++++++++--- tap_readthedocs/tap.py | 18 +++++++- tests/test_core.py | 32 -------------- 6 files changed, 106 insertions(+), 81 deletions(-) create mode 100644 tap_readthedocs/__main__.py diff --git a/README.md b/README.md index 3ead32c..b578f1d 100644 --- a/README.md +++ b/README.md @@ -37,13 +37,14 @@ Singer Tap for [**Read the Docs**](https://docs.readthedocs.io). Built with the | Setting| Required | Default | Description | |:-------|:--------:|:-------:|:------------| | token | True | None | | +| include_business_streams | False | False | Whether to include streams available only to ReadTheDocs for Business accounts | A full list of supported settings and capabilities is available by running: `tap-readthedocs --about` ## Installation ```bash -pipx install git+https://github.com/edgarrmondragon/tap-readthedocs.git +pipx install tap-readthedocs ``` ### Source Authentication and Authorization diff --git a/tap_readthedocs/__main__.py b/tap_readthedocs/__main__.py new file mode 100644 index 0000000..8a0f4f1 --- /dev/null +++ b/tap_readthedocs/__main__.py @@ -0,0 +1,7 @@ +"""Entrypoint module for tap-readthedocs.""" + +from __future__ import annotations + +from tap_readthedocs.tap import TapReadTheDocs + +TapReadTheDocs.cli() diff --git a/tap_readthedocs/client.py b/tap_readthedocs/client.py index c2f830a..a5102ce 100644 --- a/tap_readthedocs/client.py +++ b/tap_readthedocs/client.py @@ -8,8 +8,7 @@ import requests_cache from singer_sdk.authenticators import APIKeyAuthenticator from singer_sdk.exceptions import RetriableAPIError -from singer_sdk.helpers.jsonpath import extract_jsonpath -from singer_sdk.pagination import BaseOffsetPaginator, first +from singer_sdk.pagination import BaseOffsetPaginator from singer_sdk.streams import RESTStream if t.TYPE_CHECKING: @@ -19,42 +18,6 @@ TStream = t.TypeVar("TStream", bound=RESTStream[int]) -class ReadTheDocsPaginator(BaseOffsetPaginator): - """Paginator that stops when a page with 0 items is returned.""" - - def __init__(self, start_value: int, page_size: int, records_jsonpath: str) -> None: - """Create a new paginator. - - Args: - start_value: Initial value. - page_size: Number of items per page. - records_jsonpath: A JSONPath expression. - """ - super().__init__(start_value, page_size) - self._records_jsonpath = records_jsonpath - - def has_more(self, response: requests.Response) -> bool: - """Check if response has any items. - - Args: - response: API response object. - - Returns: - True if response contains at least one item. - """ - try: - first( - extract_jsonpath( - self._records_jsonpath, - response.json(), - ), - ) - except StopIteration: - return False - - return True - - class ReadTheDocsStream(RESTStream[int]): """ReadTheDocs stream class.""" @@ -118,14 +81,13 @@ def get_url_params( "expand": "config", } - def get_new_paginator(self) -> ReadTheDocsPaginator: + def get_new_paginator(self) -> BaseOffsetPaginator: """Get a fresh paginator for this API endpoint. Returns: A paginator instance. """ - return ReadTheDocsPaginator( + return BaseOffsetPaginator( start_value=0, page_size=self.page_size, - records_jsonpath=self.records_jsonpath, ) diff --git a/tap_readthedocs/streams.py b/tap_readthedocs/streams.py index 032d05c..c9721ea 100644 --- a/tap_readthedocs/streams.py +++ b/tap_readthedocs/streams.py @@ -17,7 +17,7 @@ class Projects(ReadTheDocsStream): name = "projects" path = "/api/v3/projects/" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) schema = th.PropertiesList( th.Property("id", th.IntegerType), @@ -86,7 +86,7 @@ class Versions(ReadTheDocsStream): name = "versions" path = "/api/v3/projects/{project_slug}/versions" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) parent_stream_type = Projects schema = th.PropertiesList( @@ -111,7 +111,7 @@ class Builds(ReadTheDocsStream): name = "builds" path = "/api/v3/projects/{project_slug}/builds" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) parent_stream_type = Projects schema = th.PropertiesList( @@ -189,7 +189,7 @@ class Subprojects(ReadTheDocsStream): name = "subprojects" path = "/api/v3/projects/{project_slug}/subprojects" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) parent_stream_type = Projects # TODO(edgarrmondragon): get the complete schema @@ -204,7 +204,7 @@ class Translations(ReadTheDocsStream): name = "translations" path = "/api/v3/projects/{project_slug}/translations" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) parent_stream_type = Projects # TODO(edgarrmondragon): get the complete schema @@ -219,11 +219,82 @@ class Redirects(ReadTheDocsStream): name = "redirects" path = "/api/v3/projects/{project_slug}/redirects" - primary_keys: t.ClassVar[list[str]] = ["id"] + primary_keys = ("id",) parent_stream_type = Projects # TODO(edgarrmondragon): get the complete schema # https://github.com/edgarrmondragon/tap-readthedocs/issues/2 schema = th.PropertiesList( th.Property("id", th.StringType), + # TODO(edgarrmondragon): Inform max length of 255 + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property("redirect_type", th.StringType), + # TODO(edgarrmondragon): Inform max length of 255 + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property( + "from_url", + th.StringType, + description="Absolute path, excluding the domain", + examples=["/docs/", "/install.html"], + ), + # TODO(edgarrmondragon): Inform max length of 255 + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property( + "to_url", + th.StringType, + description="Absolute or relative URL", + examples=["/tutorial/install.html"], + ), + th.Property( + "force", + th.BooleanType, + description="Apply the redirect even if the page exists", + ), + # TODO(edgarrmondragon): Inform "small" integer + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property( + "http_status", + th.IntegerType, + description="HTTP status code for the redirect", + ), + th.Property("enabled", th.BooleanType), + # TODO(edgarrmondragon): Inform max length of 255 + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property("description", th.StringType), + # TODO(edgarrmondragon): Inform positive integer + # https://github.com/edgarrmondragon/tap-readthedocs/issues/295 + th.Property( + "position", + th.IntegerType, + description="Order of execution of the redirect", + ), + th.Property("create_dt", th.DateTimeType), + th.Property("update_dt", th.DateTimeType), + ).to_dict() + + +class Organizations(ReadTheDocsStream): + """Organizations stream.""" + + name = "organizations" + path = "/api/v3/organizations/" + primary_keys = ("slug",) + + schema = th.PropertiesList( + th.Property("slug", th.StringType), + th.Property("name", th.StringType), + th.Property("url", th.StringType), + th.Property("email", th.StringType), + th.Property("description", th.StringType), + th.Property("created", th.DateTimeType), + th.Property("modified", th.DateTimeType), + th.Property("disabled", th.BooleanType), + th.Property( + "owners", + th.ArrayType( + th.ObjectType( + th.Property("username", th.StringType), + ), + ), + ), ).to_dict() diff --git a/tap_readthedocs/tap.py b/tap_readthedocs/tap.py index ab10637..a7ed203 100644 --- a/tap_readthedocs/tap.py +++ b/tap_readthedocs/tap.py @@ -14,6 +14,15 @@ class TapReadTheDocs(Tap): config_jsonschema = th.PropertiesList( th.Property("token", th.StringType, required=True), + th.Property( + "include_business_streams", + th.BooleanType, + description=( + "Whether to include streams available only to ReadTheDocs for Business " + "accounts." + ), + default=False, + ), ).to_dict() def discover_streams(self) -> list[Stream]: @@ -22,7 +31,7 @@ def discover_streams(self) -> list[Stream]: Returns: A list of ReadTheDocs streams. """ - return [ + result = [ streams.Builds(tap=self), streams.Projects(tap=self), streams.Redirects(tap=self), @@ -30,3 +39,10 @@ def discover_streams(self) -> list[Stream]: streams.Translations(tap=self), streams.Versions(tap=self), ] + + if self.config.get("include_business_streams", False): + result.extend( + [streams.Organizations(tap=self)], + ) + + return result diff --git a/tests/test_core.py b/tests/test_core.py index 9f482c8..d31d9dd 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,10 +4,8 @@ from typing import Any -from requests import Response from singer_sdk.testing import SuiteConfig, get_tap_test_class -from tap_readthedocs.client import ReadTheDocsPaginator, ReadTheDocsStream from tap_readthedocs.tap import TapReadTheDocs SAMPLE_CONFIG: dict[str, Any] = {} @@ -27,33 +25,3 @@ ], ), ) - - -def test_paginator() -> None: - """Validate paginator that uses the page offset.""" - response = Response() - paginator = ReadTheDocsPaginator( - start_value=0, - page_size=2, - records_jsonpath=ReadTheDocsStream.records_jsonpath, - ) - - assert not paginator.finished - assert paginator.current_value == 0 - - response._content = b'{"results": [{}, {}]}' - paginator.advance(response) - assert not paginator.finished - assert paginator.current_value == 2 # noqa: PLR2004 - assert paginator.count == 1 - - response._content = b'{"results": [{}, {}]}' - paginator.advance(response) - assert not paginator.finished - assert paginator.current_value == 4 # noqa: PLR2004 - assert paginator.count == 2 # noqa: PLR2004 - - response._content = b'{"results": []}' - paginator.advance(response) - assert paginator.finished - assert paginator.count == 3 # noqa: PLR2004