From 95b5642e5c3c097fdf5adc0725573393bd9dc93c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Thu, 4 Aug 2022 00:45:32 -0500 Subject: [PATCH] fix: Use importlib.resources --- ...kiecutter.stream_type %}client.py{%endif%} | 3 - ...iecutter.stream_type %}streams.py{%endif%} | 4 +- docs/code_samples.md | 6 + poetry.lock | 71 +++++--- pyproject.toml | 1 + samples/__init__.py | 0 samples/aapl/__init__.py | 0 samples/aapl/aapl.py | 5 +- .../sample_tap_countries/countries_streams.py | 7 +- .../sample_tap_countries/schemas/__init__.py | 0 .../gitlab_graphql_streams.py | 6 +- .../sample_tap_gitlab/gitlab_rest_streams.py | 5 +- samples/sample_tap_gitlab/schemas/__init__.py | 0 .../ga_tap_stream.py | 5 +- .../schemas/__init__.py | 0 singer_sdk/helpers/_util.py | 20 +++ singer_sdk/streams/core.py | 153 +++++++++--------- 17 files changed, 166 insertions(+), 120 deletions(-) create mode 100644 samples/__init__.py create mode 100644 samples/aapl/__init__.py create mode 100644 samples/sample_tap_countries/schemas/__init__.py create mode 100644 samples/sample_tap_gitlab/schemas/__init__.py create mode 100644 samples/sample_tap_google_analytics/schemas/__init__.py diff --git a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'REST' == cookiecutter.stream_type %}client.py{%endif%} b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'REST' == cookiecutter.stream_type %}client.py{%endif%} index b6a3937491..4d7b508f15 100644 --- a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'REST' == cookiecutter.stream_type %}client.py{%endif%} +++ b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'REST' == cookiecutter.stream_type %}client.py{%endif%} @@ -20,9 +20,6 @@ from {{ cookiecutter.library_name }}.auth import {{ cookiecutter.source_name }}A {%- endif %} -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") - - class {{ cookiecutter.source_name }}Stream({{ cookiecutter.stream_type }}Stream): """{{ cookiecutter.source_name }} stream class.""" diff --git a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'SQL' != cookiecutter.stream_type %}streams.py{%endif%} b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'SQL' != cookiecutter.stream_type %}streams.py{%endif%} index b6bb35daa9..de34d3e4f2 100644 --- a/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'SQL' != cookiecutter.stream_type %}streams.py{%endif%} +++ b/cookiecutter/tap-template/{{cookiecutter.tap_id}}/{{cookiecutter.library_name}}/{%if 'SQL' != cookiecutter.stream_type %}streams.py{%endif%} @@ -4,11 +4,13 @@ from pathlib import Path from typing import Any, Dict, Optional, Union, List, Iterable from singer_sdk import typing as th # JSON Schema typing helpers +from singer_sdk.helpers._util import get_package_files +from {{ cookiecutter.library_name }} import schemas from {{ cookiecutter.library_name }}.client import {{ cookiecutter.source_name }}Stream # TODO: Delete this is if not using json files for schema definition -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +SCHEMAS_DIR = get_package_files(schemas) {%- if cookiecutter.stream_type == "GraphQL" %} diff --git a/docs/code_samples.md b/docs/code_samples.md index 7338e77b6d..c2a48e25d2 100644 --- a/docs/code_samples.md +++ b/docs/code_samples.md @@ -54,6 +54,12 @@ class TapCountries(Tap): ### Define a simple GraphQL-based stream with schema defined in a file ```python +from singer_sdk.helpers._util import get_package_files +from tap_countries import schemas + +SCHEMAS_DIR = get_package_files(schemas) + + class ContinentsStream(GraphQLStream): """Continents stream from the Countries API.""" diff --git a/poetry.lock b/poetry.lock index 874083ac77..1c9aa1a327 100644 --- a/poetry.lock +++ b/poetry.lock @@ -327,6 +327,21 @@ docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"] perf = ["ipython"] testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"] +[[package]] +name = "importlib-resources" +version = "5.9.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"] +testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] + [[package]] name = "inflection" version = "0.5.1" @@ -456,9 +471,9 @@ python-versions = "~=3.6" markdown-it-py = ">=1.0.0,<3.0.0" [package.extras] -testing = ["pytest-regressions", "pytest-cov", "pytest (>=3.6,<4)", "coverage"] -rtd = ["sphinx-book-theme (>=0.1.0,<0.2.0)", "myst-parser (>=0.14.0,<0.15.0)"] code_style = ["pre-commit (==2.6)"] +rtd = ["myst-parser (>=0.14.0,<0.15.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"] +testing = ["coverage", "pytest (>=3.6,<4)", "pytest-cov", "pytest-regressions"] [[package]] name = "memoization" @@ -582,7 +597,7 @@ pytz = "<2021.0" simplejson = "3.11.1" [package.extras] -dev = ["nose", "ipdb", "ipython", "pylint"] +dev = ["pylint", "ipython", "ipdb", "nose"] [[package]] name = "platformdirs" @@ -593,8 +608,8 @@ optional = false python-versions = ">=3.6" [package.extras] -test = ["pytest-mock (>=3.6)", "pytest-cov (>=2.7)", "pytest (>=6)", "appdirs (==1.4.4)"] -docs = ["sphinx-autodoc-typehints (>=1.12)", "proselint (>=0.10.2)", "furo (>=2021.7.5b38)", "Sphinx (>=4)"] +docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"] +test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] [[package]] name = "pluggy" @@ -608,8 +623,8 @@ python-versions = ">=3.6" importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} [package.extras] -testing = ["pytest-benchmark", "pytest"] -dev = ["tox", "pre-commit"] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "ply" @@ -693,10 +708,10 @@ optional = false python-versions = ">=3.6" [package.extras] -tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] -docs = ["zope.interface", "sphinx-rtd-theme", "sphinx"] -dev = ["pre-commit", "mypy", "coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)", "cryptography (>=3.3.1)", "zope.interface", "sphinx-rtd-theme", "sphinx"] crypto = ["cryptography (>=3.3.1)"] +dev = ["sphinx", "sphinx-rtd-theme", "zope.interface", "cryptography (>=3.3.1)", "pytest (>=6.0.0,<7.0.0)", "coverage[toml] (==5.0.4)", "mypy", "pre-commit"] +docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] +tests = ["pytest (>=6.0.0,<7.0.0)", "coverage[toml] (==5.0.4)"] [[package]] name = "pyparsing" @@ -901,8 +916,8 @@ python-versions = ">=3.6" sphinx = ">=1.8" [package.extras] -rtd = ["sphinx-book-theme", "myst-nb", "ipython", "sphinx"] code_style = ["pre-commit (==2.12.1)"] +rtd = ["sphinx", "ipython", "myst-nb", "sphinx-book-theme"] [[package]] name = "sphinx-rtd-theme" @@ -928,8 +943,8 @@ optional = true python-versions = ">=3.5" [package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] test = ["pytest"] -lint = ["docutils-stubs", "mypy", "flake8"] [[package]] name = "sphinxcontrib-devhelp" @@ -940,8 +955,8 @@ optional = true python-versions = ">=3.5" [package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] test = ["pytest"] -lint = ["docutils-stubs", "mypy", "flake8"] [[package]] name = "sphinxcontrib-htmlhelp" @@ -952,8 +967,8 @@ optional = true python-versions = ">=3.6" [package.extras] -test = ["html5lib", "pytest"] -lint = ["docutils-stubs", "mypy", "flake8"] +lint = ["flake8", "mypy", "docutils-stubs"] +test = ["pytest", "html5lib"] [[package]] name = "sphinxcontrib-jsmath" @@ -964,7 +979,7 @@ optional = true python-versions = ">=3.5" [package.extras] -test = ["mypy", "flake8", "pytest"] +test = ["pytest", "flake8", "mypy"] [[package]] name = "sphinxcontrib-qthelp" @@ -975,8 +990,8 @@ optional = true python-versions = ">=3.5" [package.extras] +lint = ["flake8", "mypy", "docutils-stubs"] test = ["pytest"] -lint = ["docutils-stubs", "mypy", "flake8"] [[package]] name = "sphinxcontrib-serializinghtml" @@ -1132,15 +1147,15 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4" six = "*" [package.extras] -tests = ["pytest-cov", "pytest", "typing", "pytest", "pytest-cov", "pytest", "pytest-cov", "pytest", "pytest", "pytest-cov", "pytest", "scikit-build", "pybind11", "ninja", "codecov", "cmake"] -tests-strict = ["pytest-cov (==3.0.0)", "pytest (==6.2.5)", "typing (==3.7.4)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest-cov (==2.9.0)", "pytest (==4.6.0)", "pytest-cov (==2.8.1)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest-cov (==2.8.1)", "scikit-build (==0.11.1)", "pybind11 (==2.7.1)", "ninja (==1.10.2)", "codecov (==2.0.15)", "cmake (==3.21.2)"] +all = ["cmake", "codecov", "ninja", "pybind11", "scikit-build", "six", "colorama", "pytest", "pytest-cov", "debugpy", "pytest", "pygments", "pytest", "pytest-cov", "pytest", "pytest-cov", "jupyter-client", "ipython", "debugpy", "ipykernel", "pytest", "debugpy", "debugpy", "typing", "debugpy", "ipython-genutils", "pytest", "pygments", "attrs", "jedi", "jinja2", "jupyter-core", "nbconvert", "pytest-cov", "jupyter-client", "ipython", "ipykernel"] +all-strict = ["cmake (==3.21.2)", "codecov (==2.0.15)", "ninja (==1.10.2)", "pybind11 (==2.7.1)", "scikit-build (==0.11.1)", "six (==1.11.0)", "colorama (==0.4.1)", "pytest-cov (==2.8.1)", "pytest (==4.6.0)", "debugpy (==1.3.0)", "pytest (==4.6.0)", "Pygments (==2.0.0)", "pytest-cov (==2.8.1)", "pytest (==4.6.0)", "pytest-cov (==2.9.0)", "pytest (==4.6.0)", "jupyter-client (==6.1.5)", "IPython (==7.10.0)", "debugpy (==1.0.0)", "ipykernel (==5.2.0)", "pytest (==4.6.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "typing (==3.7.4)", "debugpy (==1.6.0)", "ipython-genutils (==0.2.0)", "pytest (==6.2.5)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "pytest-cov (==3.0.0)", "jupyter-client (==7.0.0)", "IPython (==7.23.1)", "ipykernel (==6.0.0)"] +colors = ["colorama", "pygments", "pygments"] +jupyter = ["debugpy", "jupyter-client", "ipython", "debugpy", "ipykernel", "debugpy", "debugpy", "debugpy", "ipython-genutils", "attrs", "jedi", "jinja2", "jupyter-core", "nbconvert", "jupyter-client", "ipython", "ipykernel"] +optional-strict = ["colorama (==0.4.1)", "debugpy (==1.3.0)", "tomli (==0.2.0)", "Pygments (==2.0.0)", "jupyter-client (==6.1.5)", "IPython (==7.10.0)", "debugpy (==1.0.0)", "ipykernel (==5.2.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "debugpy (==1.6.0)", "ipython-genutils (==0.2.0)", "Pygments (==2.4.1)", "attrs (==19.2.0)", "jedi (==0.16)", "jinja2 (==3.0.0)", "jupyter-core (==4.7.0)", "nbconvert (==6.0.0)", "jupyter-client (==7.0.0)", "IPython (==7.23.1)", "ipykernel (==6.0.0)"] +optional = ["colorama", "debugpy", "tomli", "pygments", "jupyter-client", "ipython", "debugpy", "ipykernel", "debugpy", "debugpy", "debugpy", "ipython-genutils", "pygments", "attrs", "jedi", "jinja2", "jupyter-core", "nbconvert", "jupyter-client", "ipython", "ipykernel"] runtime-strict = ["six (==1.11.0)"] -optional = ["ipykernel", "ipython", "jupyter-client", "nbconvert", "jupyter-core", "jinja2", "jedi", "attrs", "pygments", "ipython-genutils", "debugpy", "debugpy", "debugpy", "ipykernel", "debugpy", "ipython", "jupyter-client", "pygments", "tomli", "debugpy", "colorama"] -optional-strict = ["ipykernel (==6.0.0)", "IPython (==7.23.1)", "jupyter-client (==7.0.0)", "nbconvert (==6.0.0)", "jupyter-core (==4.7.0)", "jinja2 (==3.0.0)", "jedi (==0.16)", "attrs (==19.2.0)", "Pygments (==2.4.1)", "ipython-genutils (==0.2.0)", "debugpy (==1.6.0)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "ipykernel (==5.2.0)", "debugpy (==1.0.0)", "IPython (==7.10.0)", "jupyter-client (==6.1.5)", "Pygments (==2.0.0)", "tomli (==0.2.0)", "debugpy (==1.3.0)", "colorama (==0.4.1)"] -jupyter = ["ipykernel", "ipython", "jupyter-client", "nbconvert", "jupyter-core", "jinja2", "jedi", "attrs", "ipython-genutils", "debugpy", "debugpy", "debugpy", "ipykernel", "debugpy", "ipython", "jupyter-client", "debugpy"] -colors = ["pygments", "pygments", "colorama"] -all = ["ipykernel", "ipython", "jupyter-client", "pytest-cov", "nbconvert", "jupyter-core", "jinja2", "jedi", "attrs", "pygments", "pytest", "ipython-genutils", "debugpy", "typing", "debugpy", "debugpy", "pytest", "ipykernel", "debugpy", "ipython", "jupyter-client", "pytest-cov", "pytest", "pytest-cov", "pytest", "pygments", "pytest", "debugpy", "pytest-cov", "pytest", "colorama", "six", "scikit-build", "pybind11", "ninja", "codecov", "cmake"] -all-strict = ["ipykernel (==6.0.0)", "IPython (==7.23.1)", "jupyter-client (==7.0.0)", "pytest-cov (==3.0.0)", "nbconvert (==6.0.0)", "jupyter-core (==4.7.0)", "jinja2 (==3.0.0)", "jedi (==0.16)", "attrs (==19.2.0)", "Pygments (==2.4.1)", "pytest (==6.2.5)", "ipython-genutils (==0.2.0)", "debugpy (==1.6.0)", "typing (==3.7.4)", "debugpy (==1.0.0)", "debugpy (==1.0.0)", "pytest (==4.6.0)", "ipykernel (==5.2.0)", "debugpy (==1.0.0)", "IPython (==7.10.0)", "jupyter-client (==6.1.5)", "pytest (==4.6.0)", "pytest-cov (==2.9.0)", "pytest (==4.6.0)", "pytest-cov (==2.8.1)", "Pygments (==2.0.0)", "pytest (==4.6.0)", "debugpy (==1.3.0)", "pytest (==4.6.0)", "pytest-cov (==2.8.1)", "colorama (==0.4.1)", "six (==1.11.0)", "scikit-build (==0.11.1)", "pybind11 (==2.7.1)", "ninja (==1.10.2)", "codecov (==2.0.15)", "cmake (==3.21.2)"] +tests = ["cmake", "codecov", "ninja", "pybind11", "scikit-build", "pytest", "pytest-cov", "pytest", "pytest", "pytest-cov", "pytest", "pytest-cov", "pytest", "typing", "pytest", "pytest-cov"] +tests-strict = ["cmake (==3.21.2)", "codecov (==2.0.15)", "ninja (==1.10.2)", "pybind11 (==2.7.1)", "scikit-build (==0.11.1)", "pytest-cov (==2.8.1)", "pytest (==4.6.0)", "pytest (==4.6.0)", "pytest-cov (==2.8.1)", "pytest (==4.6.0)", "pytest-cov (==2.9.0)", "pytest (==4.6.0)", "pytest (==4.6.0)", "typing (==3.7.4)", "pytest (==6.2.5)", "pytest-cov (==3.0.0)"] [[package]] name = "zipp" @@ -1160,7 +1175,7 @@ docs = ["sphinx", "sphinx-rtd-theme", "sphinx-copybutton", "myst-parser"] [metadata] lock-version = "1.1" python-versions = "<3.11,>=3.7.1" -content-hash = "211559dd38a497cc8223d0ee4fad9f824c438887a1b5b8cfe369e28e0c115c51" +content-hash = "245bfec5a4d1042748380774a614a7ba4bc53144dfac716a7befd4f8ad8631ce" [metadata.files] alabaster = [ @@ -1459,6 +1474,10 @@ importlib-metadata = [ {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, ] +importlib-resources = [ + {file = "importlib_resources-5.9.0-py3-none-any.whl", hash = "sha256:f78a8df21a79bcc30cfd400bdc38f314333de7c0fb619763f6b9dabab8268bb7"}, + {file = "importlib_resources-5.9.0.tar.gz", hash = "sha256:5481e97fb45af8dcf2f798952625591c58fe599d0735d86b10f54de086a61681"}, +] inflection = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, diff --git a/pyproject.toml b/pyproject.toml index d4d0c7e899..587322ba24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ PyJWT = "~=2.4" requests = "^2.25.1" cryptography = ">=3.4.6,<38.0.0" importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +importlib-resources = {version = "^5.9.0", markers = "python_version < \"3.9\""} memoization = ">=0.3.2,<0.5.0" jsonpath-ng = "^1.5.3" joblib = "^1.0.1" diff --git a/samples/__init__.py b/samples/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/samples/aapl/__init__.py b/samples/aapl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/samples/aapl/aapl.py b/samples/aapl/aapl.py index db1c5b4e89..1785ae2aab 100644 --- a/samples/aapl/aapl.py +++ b/samples/aapl/aapl.py @@ -1,11 +1,12 @@ """A simple tap with one big record and schema.""" import json -from pathlib import Path +from samples import aapl from singer_sdk import Stream, Tap +from singer_sdk.helpers._util import get_package_files -PROJECT_DIR = Path(__file__).parent +PROJECT_DIR = get_package_files(aapl) class AAPL(Stream): diff --git a/samples/sample_tap_countries/countries_streams.py b/samples/sample_tap_countries/countries_streams.py index 08186a7bda..5388cf17fc 100644 --- a/samples/sample_tap_countries/countries_streams.py +++ b/samples/sample_tap_countries/countries_streams.py @@ -7,13 +7,12 @@ """ import abc -from pathlib import Path +from samples.sample_tap_countries import schemas from singer_sdk import typing as th +from singer_sdk.helpers._util import get_package_files from singer_sdk.streams.graphql import GraphQLStream -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") - class CountriesAPIStream(GraphQLStream, metaclass=abc.ABCMeta): """Sample tap test for countries. @@ -80,7 +79,7 @@ class ContinentsStream(CountriesAPIStream): name = "continents" primary_keys = ["code"] - schema_filepath = SCHEMAS_DIR / "continents.json" + schema_filepath = get_package_files(schemas, "continents.json") query = """ continents { code diff --git a/samples/sample_tap_countries/schemas/__init__.py b/samples/sample_tap_countries/schemas/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/samples/sample_tap_gitlab/gitlab_graphql_streams.py b/samples/sample_tap_gitlab/gitlab_graphql_streams.py index 53bce85a12..4e42189f71 100644 --- a/samples/sample_tap_gitlab/gitlab_graphql_streams.py +++ b/samples/sample_tap_gitlab/gitlab_graphql_streams.py @@ -4,13 +4,15 @@ # - https://gitlab.com/-/graphql-explorer """ -from pathlib import Path +import sys +from samples.sample_tap_gitlab import schemas +from singer_sdk.helpers._util import get_package_files from singer_sdk.streams import GraphQLStream SITE_URL = "https://gitlab.com/graphql" -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +SCHEMAS_DIR = get_package_files(schemas) class GitlabGraphQLStream(GraphQLStream): diff --git a/samples/sample_tap_gitlab/gitlab_rest_streams.py b/samples/sample_tap_gitlab/gitlab_rest_streams.py index 425348e6e1..58b1f22f78 100644 --- a/samples/sample_tap_gitlab/gitlab_rest_streams.py +++ b/samples/sample_tap_gitlab/gitlab_rest_streams.py @@ -1,11 +1,12 @@ """Sample tap stream test for tap-gitlab.""" -from pathlib import Path from typing import Any, Dict, List, Optional, cast import requests +from samples.sample_tap_gitlab import schemas from singer_sdk.authenticators import SimpleAuthenticator +from singer_sdk.helpers._util import get_package_files from singer_sdk.streams.rest import RESTStream from singer_sdk.typing import ( ArrayType, @@ -16,7 +17,7 @@ StringType, ) -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +SCHEMAS_DIR = get_package_files(schemas) DEFAULT_URL_BASE = "https://gitlab.com/api/v4" diff --git a/samples/sample_tap_gitlab/schemas/__init__.py b/samples/sample_tap_gitlab/schemas/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/samples/sample_tap_google_analytics/ga_tap_stream.py b/samples/sample_tap_google_analytics/ga_tap_stream.py index 5ff4fbfd92..79b0953880 100644 --- a/samples/sample_tap_google_analytics/ga_tap_stream.py +++ b/samples/sample_tap_google_analytics/ga_tap_stream.py @@ -1,16 +1,19 @@ """Sample tap stream test for tap-google-analytics.""" +import sys from pathlib import Path from typing import Any, Iterable, List, Optional, cast import pendulum +from samples.sample_tap_google_analytics import schemas from singer_sdk.authenticators import OAuthJWTAuthenticator +from singer_sdk.helpers._util import get_package_files from singer_sdk.streams import RESTStream GOOGLE_OAUTH_ENDPOINT = "https://oauth2.googleapis.com/token" GA_OAUTH_SCOPES = "https://www.googleapis.com/auth/analytics.readonly" -SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +SCHEMAS_DIR = get_package_files(schemas) class GoogleJWTAuthenticator(OAuthJWTAuthenticator): diff --git a/samples/sample_tap_google_analytics/schemas/__init__.py b/samples/sample_tap_google_analytics/schemas/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/singer_sdk/helpers/_util.py b/singer_sdk/helpers/_util.py index 67bc8e8162..5ca1cb1c2b 100644 --- a/singer_sdk/helpers/_util.py +++ b/singer_sdk/helpers/_util.py @@ -1,11 +1,19 @@ """General helper functions, helper classes, and decorators.""" import json +import sys from pathlib import Path, PurePath from typing import Any, Dict, Union, cast import pendulum +if sys.version_info >= (3, 9): + import importlib.resources as resources + from importlib.abc import Traversable +else: + import importlib_resources as resources + from importlib_resources.abc import Traversable + def read_json_file(path: Union[PurePath, str]) -> Dict[str, Any]: """Read json file, thowing an error if missing.""" @@ -25,3 +33,15 @@ def read_json_file(path: Union[PurePath, str]) -> Dict[str, Any]: def utc_now() -> pendulum.DateTime: """Return current time in UTC.""" return pendulum.now(tz="UTC") + + +def get_package_files(package: resources.Package, resource: str) -> Traversable: + """Load a static files path from a package. + + This is a wrapper around importlib.resources.files(). + + Args: + package: The package to load the resource from. + resource: The name of the resource to load. + """ + return resources.files(package) diff --git a/singer_sdk/streams/core.py b/singer_sdk/streams/core.py index cbf46a3609..cdf77b0fb5 100644 --- a/singer_sdk/streams/core.py +++ b/singer_sdk/streams/core.py @@ -1,28 +1,18 @@ """Stream abstract class.""" +from __future__ import annotations + import abc import copy import datetime import json import logging +import sys from os import PathLike from pathlib import Path from types import MappingProxyType -from typing import ( - Any, - Callable, - Dict, - Generator, - Iterable, - List, - Mapping, - Optional, - Tuple, - Type, - TypeVar, - Union, - cast, -) +from typing import Any, Callable, Generator, Iterable, Mapping, TypeVar, cast +from warnings import warn import pendulum import requests @@ -56,6 +46,12 @@ from singer_sdk.mapper import RemoveRecordTransform, SameRecordTransform, StreamMap from singer_sdk.plugin_base import PluginBase as TapBaseClass +if sys.version_info >= (3, 9): + from importlib.abc import Traversable +else: + from importlib_resources.abc import Traversable + + # Replication methods REPLICATION_FULL_TABLE = "FULL_TABLE" REPLICATION_INCREMENTAL = "INCREMENTAL" @@ -70,20 +66,20 @@ class Stream(metaclass=abc.ABCMeta): """Abstract base class for tap streams.""" STATE_MSG_FREQUENCY = 10000 # Number of records between state messages - _MAX_RECORDS_LIMIT: Optional[int] = None + _MAX_RECORDS_LIMIT: int | None = None # Used for nested stream relationships - parent_stream_type: Optional[Type["Stream"]] = None + parent_stream_type: type[Stream] | None = None ignore_parent_replication_key: bool = False # Internal API cost aggregator - _sync_costs: Dict[str, int] = {} + _sync_costs: dict[str, int] = {} def __init__( self, tap: TapBaseClass, - schema: Optional[Union[str, PathLike, Dict[str, Any], Schema]] = None, - name: Optional[str] = None, + schema: None | (str | PathLike | dict[str, Any] | Schema | Traversable) = None, + name: str | None = None, ) -> None: """Init tap stream. @@ -106,25 +102,32 @@ def __init__( self._config: dict = dict(tap.config) self._tap = tap self._tap_state = tap.state - self._tap_input_catalog: Optional[Catalog] = None - self._stream_maps: Optional[List[StreamMap]] = None - self.forced_replication_method: Optional[str] = None - self._replication_key: Optional[str] = None - self._primary_keys: Optional[List[str]] = None - self._state_partitioning_keys: Optional[List[str]] = None - self._schema_filepath: Optional[Path] = None - self._metadata: Optional[MetadataMapping] = None - self._mask: Optional[SelectionMask] = None + self._tap_input_catalog: Catalog | None = None + self._stream_maps: list[StreamMap] | None = None + self.forced_replication_method: str | None = None + self._replication_key: str | None = None + self._primary_keys: list[str] | None = None + self._state_partitioning_keys: list[str] | None = None + self._schema_filepath: Path | Traversable | None = None + self._metadata: MetadataMapping | None = None + self._mask: SelectionMask | None = None self._schema: dict - self.child_streams: List[Stream] = [] + self.child_streams: list[Stream] = [] if schema: if isinstance(schema, (PathLike, str)): if not Path(schema).is_file(): raise FileNotFoundError( f"Could not find schema file '{self.schema_filepath}'." ) + warn( + "Passing a string or pathlib.Path schema filepath is deprecated. " + + "Use importlib.resources.files() instead.", + DeprecationWarning, + ) self._schema_filepath = Path(schema) + elif isinstance(schema, Traversable): + self._schema_filepath = schema elif isinstance(schema, dict): self._schema = schema elif isinstance(schema, Schema): @@ -135,7 +138,7 @@ def __init__( ) if self.schema_filepath: - self._schema = json.loads(Path(self.schema_filepath).read_text()) + self._schema = json.loads(self.schema_filepath.read_text()) if not self.schema: raise ValueError( @@ -144,7 +147,7 @@ def __init__( ) @property - def stream_maps(self) -> List[StreamMap]: + def stream_maps(self) -> list[StreamMap]: """Get stream transformation maps. The 0th item is the primary stream map. List should not be empty. @@ -191,9 +194,7 @@ def is_timestamp_replication_key(self) -> bool: type_dict = self.schema.get("properties", {}).get(self.replication_key) return is_datetime_type(type_dict) - def get_starting_replication_key_value( - self, context: Optional[dict] - ) -> Optional[Any]: + def get_starting_replication_key_value(self, context: dict | None) -> Any | None: """Get starting replication key. Will return the value of the stream's replication key when `--state` is passed. @@ -213,9 +214,7 @@ def get_starting_replication_key_value( return get_starting_replication_value(state) - def get_starting_timestamp( - self, context: Optional[dict] - ) -> Optional[datetime.datetime]: + def get_starting_timestamp(self, context: dict | None) -> datetime.datetime | None: """Get starting replication timestamp. Will return the value of the stream's replication key when `--state` is passed. @@ -273,21 +272,21 @@ def has_selected_descendents(self) -> bool: @final @property - def descendent_streams(self) -> List["Stream"]: + def descendent_streams(self) -> list[Stream]: """Get child streams. Returns: A list of all children, recursively. """ - result: List[Stream] = list(self.child_streams) or [] + result: list[Stream] = list(self.child_streams) or [] for child in self.child_streams: result += child.descendent_streams or [] return result def _write_replication_key_signpost( self, - context: Optional[dict], - value: Union[datetime.datetime, str, int, float], + context: dict | None, + value: datetime.datetime | str | int | float, ) -> None: """Write the signpost value, if available. @@ -304,7 +303,7 @@ def _write_replication_key_signpost( state = self.get_context_state(context) write_replication_key_signpost(state, value) - def _write_starting_replication_value(self, context: Optional[dict]) -> None: + def _write_starting_replication_value(self, context: dict | None) -> None: """Write the starting replication value, if available. Args: @@ -326,8 +325,8 @@ def _write_starting_replication_value(self, context: Optional[dict]) -> None: write_starting_replication_value(state, value) def get_replication_key_signpost( - self, context: Optional[dict] - ) -> Optional[Union[datetime.datetime, Any]]: + self, context: dict | None + ) -> datetime.datetime | Any | None: """Get the replication signpost. For timestamp-based replication keys, this defaults to `utc_now()`. For @@ -351,7 +350,7 @@ def get_replication_key_signpost( return None @property - def schema_filepath(self) -> Optional[Path]: + def schema_filepath(self) -> Path | Traversable | None: """Get path to schema file. Returns: @@ -369,7 +368,7 @@ def schema(self) -> dict: return self._schema @property - def primary_keys(self) -> Optional[List[str]]: + def primary_keys(self) -> list[str] | None: """Get primary keys. Returns: @@ -380,7 +379,7 @@ def primary_keys(self) -> Optional[List[str]]: return self._primary_keys @primary_keys.setter - def primary_keys(self, new_value: List[str]) -> None: + def primary_keys(self, new_value: list[str]) -> None: """Set primary key(s) for the stream. Args: @@ -389,7 +388,7 @@ def primary_keys(self, new_value: List[str]) -> None: self._primary_keys = new_value @property - def state_partitioning_keys(self) -> Optional[List[str]]: + def state_partitioning_keys(self) -> list[str] | None: """Get state partition keys. If not set, a default partitioning will be inherited from the stream's context. @@ -401,7 +400,7 @@ def state_partitioning_keys(self) -> Optional[List[str]]: return self._state_partitioning_keys @state_partitioning_keys.setter - def state_partitioning_keys(self, new_value: Optional[List[str]]) -> None: + def state_partitioning_keys(self, new_value: list[str] | None) -> None: """Set partition keys for the stream state bookmarks. If not set, a default partitioning will be inherited from the stream's context. @@ -413,7 +412,7 @@ def state_partitioning_keys(self, new_value: Optional[List[str]]) -> None: self._state_partitioning_keys = new_value @property - def replication_key(self) -> Optional[str]: + def replication_key(self) -> str | None: """Get replication key. Returns: @@ -578,7 +577,7 @@ def tap_state(self) -> dict: """ return self._tap_state - def get_context_state(self, context: Optional[dict]) -> dict: + def get_context_state(self, context: dict | None) -> dict: """Return a writable state dict for the given context. Gives a partitioned context state if applicable; else returns stream state. @@ -633,7 +632,7 @@ def stream_state(self) -> dict: # Partitions @property - def partitions(self) -> Optional[List[dict]]: + def partitions(self) -> list[dict] | None: """Get stream partitions. Developers may override this property to provide a default partitions list. @@ -644,7 +643,7 @@ def partitions(self) -> Optional[List[dict]]: Returns: A list of partition key dicts (if applicable), otherwise `None`. """ - result: List[dict] = [] + result: list[dict] = [] for partition_state in ( get_state_partitions_list(self.tap_state, self.name) or [] ): @@ -654,7 +653,7 @@ def partitions(self) -> Optional[List[dict]]: # Private bookmarking methods def _increment_stream_state( - self, latest_record: Dict[str, Any], *, context: Optional[dict] = None + self, latest_record: dict[str, Any], *, context: dict | None = None ) -> None: """Update state of stream or partition with data from the provided record. @@ -776,7 +775,7 @@ def _write_record_message(self, record: dict) -> None: singer.write_message(record_message) @property - def _metric_logging_function(self) -> Optional[Callable]: + def _metric_logging_function(self) -> Callable | None: """Return the metrics logging function. Returns: @@ -802,7 +801,7 @@ def _metric_logging_function(self) -> Optional[Callable]: + self.config[METRICS_LOG_LEVEL_SETTING] ) - def _write_metric_log(self, metric: dict, extra_tags: Optional[dict]) -> None: + def _write_metric_log(self, metric: dict, extra_tags: dict | None) -> None: """Emit a metric log. Optionally with appended tag info. Args: @@ -819,9 +818,7 @@ def _write_metric_log(self, metric: dict, extra_tags: Optional[dict]) -> None: metric["tags"].update(extra_tags) self._metric_logging_function(f"INFO METRIC: {json.dumps(metric)}") - def _write_record_count_log( - self, record_count: int, context: Optional[dict] - ) -> None: + def _write_record_count_log(self, record_count: int, context: dict | None) -> None: """Emit a metric log. Optionally with appended tag info. Args: @@ -829,7 +826,7 @@ def _write_record_count_log( context: Stream partition or context dictionary. """ extra_tags = {} if not context else {"context": context} - counter_metric: Dict[str, Any] = { + counter_metric: dict[str, Any] = { "type": "counter", "metric": "record_count", "value": record_count, @@ -841,8 +838,8 @@ def _write_request_duration_log( self, endpoint: str, response: requests.Response, - context: Optional[dict], - extra_tags: Optional[dict], + context: dict | None, + extra_tags: dict | None, ) -> None: """TODO. @@ -852,7 +849,7 @@ def _write_request_duration_log( context: Stream partition or context dictionary. extra_tags: TODO """ - request_duration_metric: Dict[str, Any] = { + request_duration_metric: dict[str, Any] = { "type": "timer", "metric": "http_request_duration", "value": response.elapsed.total_seconds(), @@ -899,7 +896,7 @@ def _check_max_record_limit(self, record_count: int) -> None: # Handle interim stream state - def reset_state_progress_markers(self, state: Optional[dict] = None) -> None: + def reset_state_progress_markers(self, state: dict | None = None) -> None: """Reset progress markers. If all=True, all state contexts will be set. This method is internal to the SDK and should not need to be overridden. @@ -908,7 +905,7 @@ def reset_state_progress_markers(self, state: Optional[dict] = None) -> None: state: State object to promote progress markers with. """ if state is None or state == {}: - context: Optional[dict] + context: dict | None for context in self.partitions or [{}]: context = context or None state = self.get_context_state(context) @@ -917,7 +914,7 @@ def reset_state_progress_markers(self, state: Optional[dict] = None) -> None: reset_state_progress_markers(state) - def finalize_state_progress_markers(self, state: Optional[dict] = None) -> None: + def finalize_state_progress_markers(self, state: dict | None = None) -> None: """Reset progress markers. If all=True, all state contexts will be finalized. This method is internal to the SDK and should not need to be overridden. @@ -931,7 +928,7 @@ def finalize_state_progress_markers(self, state: Optional[dict] = None) -> None: for child_stream in self.child_streams or []: child_stream.finalize_state_progress_markers() - context: Optional[dict] + context: dict | None for context in self.partitions or [{}]: context = context or None state = self.get_context_state(context) @@ -943,7 +940,7 @@ def finalize_state_progress_markers(self, state: Optional[dict] = None) -> None: # Private sync methods: def _sync_records( # noqa C901 # too complex - self, context: Optional[dict] = None + self, context: dict | None = None ) -> None: """Sync records, emitting RECORD and STATE messages. @@ -954,8 +951,8 @@ def _sync_records( # noqa C901 # too complex InvalidStreamSortException: TODO """ record_count = 0 - current_context: Optional[dict] - context_list: Optional[List[dict]] + current_context: dict | None + context_list: list[dict] | None context_list = [context] if context is not None else self.partitions selected = self.selected @@ -965,7 +962,7 @@ def _sync_records( # noqa C901 # too complex state = self.get_context_state(current_context) state_partition_context = self._get_state_partition_context(current_context) self._write_starting_replication_value(current_context) - child_context: Optional[dict] = ( + child_context: dict | None = ( None if current_context is None else copy.copy(current_context) ) for record_result in self.get_records(current_context): @@ -1020,7 +1017,7 @@ def _sync_records( # noqa C901 # too complex # Public methods ("final", not recommended to be overridden) @final - def sync(self, context: Optional[dict] = None) -> None: + def sync(self, context: dict | None = None) -> None: """Sync this stream. This method is internal to the SDK and should not need to be overridden. @@ -1071,7 +1068,7 @@ def apply_catalog(self, catalog: Catalog) -> None: if catalog_entry.replication_method: self.forced_replication_method = catalog_entry.replication_method - def _get_state_partition_context(self, context: Optional[dict]) -> Optional[Dict]: + def _get_state_partition_context(self, context: dict | None) -> dict | None: """Override state handling if Stream.state_partitioning_keys is specified. Args: @@ -1088,7 +1085,7 @@ def _get_state_partition_context(self, context: Optional[dict]) -> Optional[Dict return {k: v for k, v in context.items() if k in self.state_partitioning_keys} - def get_child_context(self, record: dict, context: Optional[dict]) -> dict: + def get_child_context(self, record: dict, context: dict | None) -> dict: """Return a child context object from the record and optional provided context. By default, will return context if provided and otherwise the record dict. @@ -1125,9 +1122,7 @@ def get_child_context(self, record: dict, context: Optional[dict]) -> dict: # Abstract Methods @abc.abstractmethod - def get_records( - self, context: Optional[dict] - ) -> Iterable[Union[dict, Tuple[dict, dict]]]: + def get_records(self, context: dict | None) -> Iterable[dict | tuple[dict, dict]]: """Abstract row generator function. Must be overridden by the child class. Each row emitted should be a dictionary of property names to their values. @@ -1153,7 +1148,7 @@ def get_records( """ pass - def post_process(self, row: dict, context: Optional[dict] = None) -> Optional[dict]: + def post_process(self, row: dict, context: dict | None = None) -> dict | None: """As needed, append or transform raw data to match expected structure. Optional. This method gives developers an opportunity to "clean up" the results