From 874f7f72b237fc2ca282f40881c0e08db36c4bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= <edgarrm358@gmail.com> Date: Fri, 16 Sep 2022 22:04:11 -0500 Subject: [PATCH 1/5] Refactor singer library code into singer_sdk._singerlib --- .flake8 | 2 + noxfile.py | 2 + poetry.lock | 203 ++++++++----- pyproject.toml | 8 +- samples/sample_mapper/mapper.py | 3 +- singer_sdk/_singerlib/__init__.py | 19 ++ .../_singer.py => _singerlib/catalog.py} | 226 +++++++++++---- singer_sdk/_singerlib/messages.py | 175 ++++++++++++ .../_schema.py => _singerlib/schema.py} | 71 ++--- singer_sdk/authenticators.py | 3 +- singer_sdk/helpers/_batch.py | 33 +-- singer_sdk/helpers/_catalog.py | 2 +- singer_sdk/io_base.py | 2 +- singer_sdk/mapper.py | 2 +- singer_sdk/mapper_base.py | 2 +- singer_sdk/streams/core.py | 48 ++-- singer_sdk/streams/rest.py | 2 +- singer_sdk/streams/sql.py | 5 +- singer_sdk/tap_base.py | 6 +- singer_sdk/testing.py | 4 +- tests/_singerlib/test_catalog.py | 267 ++++++++++++++++++ tests/_singerlib/test_messages.py | 139 +++++++++ tests/_singerlib/test_schema.py | 86 ++++++ tests/core/test_catalog_selection.py | 5 +- tests/core/test_mapper.py | 2 +- tests/core/test_schema.py | 14 +- tests/core/test_singer.py | 118 -------- tests/core/test_singer_messages.py | 6 +- tests/core/test_sqlite.py | 2 +- tests/core/test_streams.py | 2 +- tests/external/test_gitlab_sync.py | 2 +- 31 files changed, 1084 insertions(+), 377 deletions(-) create mode 100644 singer_sdk/_singerlib/__init__.py rename singer_sdk/{helpers/_singer.py => _singerlib/catalog.py} (58%) create mode 100644 singer_sdk/_singerlib/messages.py rename singer_sdk/{helpers/_schema.py => _singerlib/schema.py} (59%) create mode 100644 tests/_singerlib/test_catalog.py create mode 100644 tests/_singerlib/test_messages.py create mode 100644 tests/_singerlib/test_schema.py delete mode 100644 tests/core/test_singer.py diff --git a/.flake8 b/.flake8 index ed8440b87..1ae5c448f 100644 --- a/.flake8 +++ b/.flake8 @@ -11,6 +11,8 @@ per-file-ignores = singer_sdk/helpers/_*.py:ANN,DAR,D105 # Don't require docstrings conventions in "meta" code # singer_sdk/helpers/_classproperty.py:D105 + # Ignore unused imports in __init__.py files + singer_sdk/_singerlib/__init__.py:F401 max-complexity = 10 docstring-convention = google allow-star-arg-any = true diff --git a/noxfile.py b/noxfile.py index 44d50b586..f7cc2be2c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -40,6 +40,8 @@ def mypy(session: Session) -> None: "sqlalchemy2-stubs", "types-python-dateutil", "types-requests", + "types-pytz", + "types-simplejson", ) session.run("mypy", *args) if not session.posargs: diff --git a/poetry.lock b/poetry.lock index e5d154bb5..e9666af14 100644 --- a/poetry.lock +++ b/poetry.lock @@ -53,11 +53,11 @@ pytz = ">=2015.7" [[package]] name = "backoff" -version = "1.8.0" +version = "1.11.1" description = "Function decoration for backoff and retry" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [[package]] name = "binaryornot" @@ -131,14 +131,6 @@ python-versions = ">=3.6.0" [package.extras] unicode_backport = ["unicodedata2"] -[[package]] -name = "ciso8601" -version = "2.2.0" -description = "Fast ISO8601 date time parser for Python written in C" -category = "main" -optional = false -python-versions = "*" - [[package]] name = "click" version = "8.1.3" @@ -375,6 +367,21 @@ docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +[[package]] +name = "importlib-resources" +version = "5.9.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + [[package]] name = "inflection" version = "0.5.1" @@ -440,22 +447,23 @@ six = "*" [[package]] name = "jsonschema" -version = "3.2.0" +version = "4.16.0" description = "An implementation of JSON Schema validation for Python" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.7" [package.dependencies] attrs = ">=17.4.0" importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} -pyrsistent = ">=0.14.0" -setuptools = "*" -six = ">=1.11.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} +pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" +typing-extensions = {version = "*", markers = "python_version < \"3.8\""} [package.extras] -format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] -format_nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] [[package]] name = "livereload" @@ -637,23 +645,12 @@ python-dateutil = ">=2.6,<3.0" pytzdata = ">=2020.1" [[package]] -name = "pipelinewise-singer-python" -version = "1.2.0" -description = "Singer.io utility library - PipelineWise compatible" +name = "pkgutil_resolve_name" +version = "1.3.10" +description = "Resolve a name to an object." category = "main" optional = false -python-versions = "*" - -[package.dependencies] -backoff = "1.8.0" -ciso8601 = "*" -jsonschema = "3.2.0" -python-dateutil = ">=2.6.0" -pytz = "<2021.0" -simplejson = "3.11.1" - -[package.extras] -dev = ["ipdb", "ipython", "nose", "pylint"] +python-versions = ">=3.6" [[package]] name = "platformdirs" @@ -882,7 +879,7 @@ unidecode = ["Unidecode (>=1.1.1)"] [[package]] name = "pytz" -version = "2020.5" +version = "2022.2.1" description = "World timezone definitions, modern and historical" category = "main" optional = false @@ -953,11 +950,11 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( [[package]] name = "simplejson" -version = "3.11.1" +version = "3.17.6" description = "Simple, fast, extensible JSON encoder/decoder for Python" category = "main" optional = false -python-versions = "*" +python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" [[package]] name = "six" @@ -1208,6 +1205,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "types-pytz" +version = "2022.2.1.0" +description = "Typing stubs for pytz" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-requests" version = "2.28.10" @@ -1219,6 +1224,14 @@ python-versions = "*" [package.dependencies] types-urllib3 = "<1.27" +[[package]] +name = "types-simplejson" +version = "3.17.7" +description = "Typing stubs for simplejson" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-urllib3" version = "1.26.23" @@ -1310,7 +1323,7 @@ docs = ["sphinx", "sphinx-rtd-theme", "sphinx-copybutton", "myst-parser", "sphin [metadata] lock-version = "1.1" python-versions = "<3.11,>=3.7.1" -content-hash = "78dd958153254f99eda34f3925bec4ec440324dcb26edf91035cc1ed6d84358c" +content-hash = "875bec45c581e109e60f69cb7912213df86da474c0cca43bf8ba6a5ac01dfcb9" [metadata.files] alabaster = [ @@ -1334,8 +1347,8 @@ babel = [ {file = "Babel-2.10.3.tar.gz", hash = "sha256:7614553711ee97490f732126dc077f8d0ae084ebc6a96e23db1482afabdb2c51"}, ] backoff = [ - {file = "backoff-1.8.0-py2.py3-none-any.whl", hash = "sha256:d340bb6f36d025c04214b8925112d8456970e5f28dda46e4f1133bf5c622cb0a"}, - {file = "backoff-1.8.0.tar.gz", hash = "sha256:c7187f15339e775aec926dc6e5e42f8a3ad7d3c2b9a6ecae7b535000f70cd838"}, + {file = "backoff-1.11.1-py2.py3-none-any.whl", hash = "sha256:61928f8fa48d52e4faa81875eecf308eccfb1016b018bb6bd21e05b5d90a96c5"}, + {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"}, ] binaryornot = [ {file = "binaryornot-0.4.4-py2.py3-none-any.whl", hash = "sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4"}, @@ -1444,9 +1457,6 @@ charset-normalizer = [ {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, ] -ciso8601 = [ - {file = "ciso8601-2.2.0.tar.gz", hash = "sha256:14ad817ed31a698372d42afa81b0173d71cd1d0b48b7499a2da2a01dcc8695e6"}, -] click = [ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, @@ -1645,6 +1655,10 @@ importlib-metadata = [ {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, {file = "importlib_metadata-4.12.0.tar.gz", hash = "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670"}, ] +importlib-resources = [ + {file = "importlib_resources-5.9.0-py3-none-any.whl", hash = "sha256:f78a8df21a79bcc30cfd400bdc38f314333de7c0fb619763f6b9dabab8268bb7"}, + {file = "importlib_resources-5.9.0.tar.gz", hash = "sha256:5481e97fb45af8dcf2f798952625591c58fe599d0735d86b10f54de086a61681"}, +] inflection = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, @@ -1671,8 +1685,8 @@ jsonpath-ng = [ {file = "jsonpath_ng-1.5.3-py3-none-any.whl", hash = "sha256:292a93569d74029ba75ac2dc3d3630fc0e17b2df26119a165fa1d498ca47bf65"}, ] jsonschema = [ - {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"}, - {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"}, + {file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"}, + {file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"}, ] livereload = [ {file = "livereload-2.6.3.tar.gz", hash = "sha256:776f2f865e59fde56490a56bcc6773b6917366bce0c267c60ee8aaf1a0959869"}, @@ -1839,9 +1853,9 @@ pendulum = [ {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, ] -pipelinewise-singer-python = [ - {file = "pipelinewise-singer-python-1.2.0.tar.gz", hash = "sha256:8ba501f9092dbd686cd5792ecf6aa97c2d25c225e9d8b2875dcead0f5738898c"}, - {file = "pipelinewise_singer_python-1.2.0-py3-none-any.whl", hash = "sha256:156f011cba10b1591ae37c5510ed9d21639258c1377cc00c07d9f7e9a3ae27fb"}, +pkgutil_resolve_name = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, ] platformdirs = [ {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, @@ -1971,8 +1985,8 @@ python-slugify = [ {file = "python_slugify-6.1.2-py2.py3-none-any.whl", hash = "sha256:7b2c274c308b62f4269a9ba701aa69a797e9bca41aeee5b3a9e79e36b6656927"}, ] pytz = [ - {file = "pytz-2020.5-py2.py3-none-any.whl", hash = "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4"}, - {file = "pytz-2020.5.tar.gz", hash = "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"}, + {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"}, + {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"}, ] pytzdata = [ {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, @@ -1986,13 +2000,6 @@ pyyaml = [ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, @@ -2033,23 +2040,67 @@ setuptools = [ {file = "setuptools-65.3.0.tar.gz", hash = "sha256:7732871f4f7fa58fb6bdcaeadb0161b2bd046c85905dbaa066bdcbcc81953b57"}, ] simplejson = [ - {file = "simplejson-3.11.1-cp27-cp27m-win32.whl", hash = "sha256:38c2b563cd03363e7cb2bbba6c20ae4eaafd853a83954c8c8dd345ee391787bf"}, - {file = "simplejson-3.11.1-cp27-cp27m-win_amd64.whl", hash = "sha256:8d73b96a6ee7c81fd49dac7225e3846fd60b54a0b5b93a0aaea04c5a5d2e7bf2"}, - {file = "simplejson-3.11.1-cp33-cp33m-win32.whl", hash = "sha256:7f53ab6a675594f237ce7372c1edf742a6acb158149ed3259c5fffc5b613dc94"}, - {file = "simplejson-3.11.1-cp33-cp33m-win_amd64.whl", hash = "sha256:86aa9fd492230c4b8b6814fcf089b36ffba2cec4d0635c8c642135b9067ebbd7"}, - {file = "simplejson-3.11.1-cp34-cp34m-win32.whl", hash = "sha256:7df76ae6cac4a62ad5295f9a9131857077d84cb15fad2011acb2ce7410476009"}, - {file = "simplejson-3.11.1-cp34-cp34m-win_amd64.whl", hash = "sha256:a6939199c30b78ae31e62e6913f0e12cb71a4a5ad67c259e0a98688df027a5de"}, - {file = "simplejson-3.11.1-cp35-cp35m-win32.whl", hash = "sha256:11d91b88cc1e9645c79f0f6fd2961684249af963e2bbff5a00061ed4bbf55379"}, - {file = "simplejson-3.11.1-cp35-cp35m-win_amd64.whl", hash = "sha256:36b0de42e3a8a51086c339cc803f6ac7a9d1d5254066d680956a195ca12cf0d8"}, - {file = "simplejson-3.11.1.tar.gz", hash = "sha256:01a22d49ddd9a168b136f26cac87d9a335660ce07aa5c630b8e3607d6f4325e7"}, - {file = "simplejson-3.11.1.win-amd64-py2.7.exe", hash = "sha256:1975e6b621fe1c2b9321c56476e8ebe1b851006517c1d67041b378950374694c"}, - {file = "simplejson-3.11.1.win-amd64-py3.3.exe", hash = "sha256:f60f01b16215568a08611eb6a4d61d76c4173c3d69aac9cad593777056c284d5"}, - {file = "simplejson-3.11.1.win-amd64-py3.4.exe", hash = "sha256:6be48181337ac5f5d9f48c9c504f317e245519318992122a05c40e482a721d59"}, - {file = "simplejson-3.11.1.win-amd64-py3.5.exe", hash = "sha256:8ae8cdcbe49e29ddfdae0ab81c1f6c070706d18fcee86371352d0d54b47ad8ec"}, - {file = "simplejson-3.11.1.win32-py2.7.exe", hash = "sha256:ebbd52b59948350ad66205e66b299fcca0e0821ed275c21262c522f4a6cea9d2"}, - {file = "simplejson-3.11.1.win32-py3.3.exe", hash = "sha256:2dc7fb8c0c0ff9483ce31b93b700b1fa60aca9d099e6aca9813f28ff131ccf59"}, - {file = "simplejson-3.11.1.win32-py3.4.exe", hash = "sha256:97cc43ef4cb18a2725f6e26d22b96f8ca50872a195bde32707dcb284f89c1d4d"}, - {file = "simplejson-3.11.1.win32-py3.5.exe", hash = "sha256:c76d55d78dc8b06c96fd08c6cc5e2b0b650799627d3f9ca4ad23f40db72d5f6d"}, + {file = "simplejson-3.17.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a89acae02b2975b1f8e4974cb8cdf9bf9f6c91162fb8dec50c259ce700f2770a"}, + {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:82ff356ff91be0ab2293fc6d8d262451eb6ac4fd999244c4b5f863e049ba219c"}, + {file = "simplejson-3.17.6-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:0de783e9c2b87bdd75b57efa2b6260c24b94605b5c9843517577d40ee0c3cc8a"}, + {file = "simplejson-3.17.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:d24a9e61df7a7787b338a58abfba975414937b609eb6b18973e25f573bc0eeeb"}, + {file = "simplejson-3.17.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:e8603e691580487f11306ecb066c76f1f4a8b54fb3bdb23fa40643a059509366"}, + {file = "simplejson-3.17.6-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:9b01e7b00654115965a206e3015f0166674ec1e575198a62a977355597c0bef5"}, + {file = "simplejson-3.17.6-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:37bc0cf0e5599f36072077e56e248f3336917ded1d33d2688624d8ed3cefd7d2"}, + {file = "simplejson-3.17.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:cf6e7d5fe2aeb54898df18db1baf479863eae581cce05410f61f6b4188c8ada1"}, + {file = "simplejson-3.17.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:bdfc54b4468ed4cd7415928cbe782f4d782722a81aeb0f81e2ddca9932632211"}, + {file = "simplejson-3.17.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd16302d39c4d6f4afde80edd0c97d4db643327d355a312762ccd9bd2ca515ed"}, + {file = "simplejson-3.17.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:deac4bdafa19bbb89edfb73b19f7f69a52d0b5bd3bb0c4ad404c1bbfd7b4b7fd"}, + {file = "simplejson-3.17.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8bbdb166e2fb816e43ab034c865147edafe28e1b19c72433147789ac83e2dda"}, + {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7854326920d41c3b5d468154318fe6ba4390cb2410480976787c640707e0180"}, + {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:04e31fa6ac8e326480703fb6ded1488bfa6f1d3f760d32e29dbf66d0838982ce"}, + {file = "simplejson-3.17.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f63600ec06982cdf480899026f4fda622776f5fabed9a869fdb32d72bc17e99a"}, + {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e03c3b8cc7883a54c3f34a6a135c4a17bc9088a33f36796acdb47162791b02f6"}, + {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a2d30d6c1652140181dc6861f564449ad71a45e4f165a6868c27d36745b65d40"}, + {file = "simplejson-3.17.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a1aa6e4cae8e3b8d5321be4f51c5ce77188faf7baa9fe1e78611f93a8eed2882"}, + {file = "simplejson-3.17.6-cp310-cp310-win32.whl", hash = "sha256:97202f939c3ff341fc3fa84d15db86156b1edc669424ba20b0a1fcd4a796a045"}, + {file = "simplejson-3.17.6-cp310-cp310-win_amd64.whl", hash = "sha256:80d3bc9944be1d73e5b1726c3bbfd2628d3d7fe2880711b1eb90b617b9b8ac70"}, + {file = "simplejson-3.17.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9fa621b3c0c05d965882c920347b6593751b7ab20d8fa81e426f1735ca1a9fc7"}, + {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd2fb11922f58df8528adfca123f6a84748ad17d066007e7ac977720063556bd"}, + {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:724c1fe135aa437d5126138d977004d165a3b5e2ee98fc4eb3e7c0ef645e7e27"}, + {file = "simplejson-3.17.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4ff4ac6ff3aa8f814ac0f50bf218a2e1a434a17aafad4f0400a57a8cc62ef17f"}, + {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:67093a526e42981fdd954868062e56c9b67fdd7e712616cc3265ad0c210ecb51"}, + {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:5d6b4af7ad7e4ac515bc6e602e7b79e2204e25dbd10ab3aa2beef3c5a9cad2c7"}, + {file = "simplejson-3.17.6-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1c9b1ed7ed282b36571638297525f8ef80f34b3e2d600a56f962c6044f24200d"}, + {file = "simplejson-3.17.6-cp36-cp36m-win32.whl", hash = "sha256:632ecbbd2228575e6860c9e49ea3cc5423764d5aa70b92acc4e74096fb434044"}, + {file = "simplejson-3.17.6-cp36-cp36m-win_amd64.whl", hash = "sha256:4c09868ddb86bf79b1feb4e3e7e4a35cd6e61ddb3452b54e20cf296313622566"}, + {file = "simplejson-3.17.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4b6bd8144f15a491c662f06814bd8eaa54b17f26095bb775411f39bacaf66837"}, + {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5decdc78849617917c206b01e9fc1d694fd58caa961be816cb37d3150d613d9a"}, + {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:521877c7bd060470806eb6335926e27453d740ac1958eaf0d8c00911bc5e1802"}, + {file = "simplejson-3.17.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:65b998193bd7b0c7ecdfffbc825d808eac66279313cb67d8892bb259c9d91494"}, + {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac786f6cb7aa10d44e9641c7a7d16d7f6e095b138795cd43503769d4154e0dc2"}, + {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3ff5b3464e1ce86a8de8c88e61d4836927d5595c2162cab22e96ff551b916e81"}, + {file = "simplejson-3.17.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:69bd56b1d257a91e763256d63606937ae4eb890b18a789b66951c00062afec33"}, + {file = "simplejson-3.17.6-cp37-cp37m-win32.whl", hash = "sha256:b81076552d34c27e5149a40187a8f7e2abb2d3185576a317aaf14aeeedad862a"}, + {file = "simplejson-3.17.6-cp37-cp37m-win_amd64.whl", hash = "sha256:07ecaafc1b1501f275bf5acdee34a4ad33c7c24ede287183ea77a02dc071e0c0"}, + {file = "simplejson-3.17.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:068670af975247acbb9fc3d5393293368cda17026db467bf7a51548ee8f17ee1"}, + {file = "simplejson-3.17.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4d1c135af0c72cb28dd259cf7ba218338f4dc027061262e46fe058b4e6a4c6a3"}, + {file = "simplejson-3.17.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23fe704da910ff45e72543cbba152821685a889cf00fc58d5c8ee96a9bad5f94"}, + {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f444762fed1bc1fd75187ef14a20ed900c1fbb245d45be9e834b822a0223bc81"}, + {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:681eb4d37c9a9a6eb9b3245a5e89d7f7b2b9895590bb08a20aa598c1eb0a1d9d"}, + {file = "simplejson-3.17.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8e8607d8f6b4f9d46fee11447e334d6ab50e993dd4dbfb22f674616ce20907ab"}, + {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b10556817f09d46d420edd982dd0653940b90151d0576f09143a8e773459f6fe"}, + {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e1ec8a9ee0987d4524ffd6299e778c16cc35fef6d1a2764e609f90962f0b293a"}, + {file = "simplejson-3.17.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0b4126cac7d69ac06ff22efd3e0b3328a4a70624fcd6bca4fc1b4e6d9e2e12bf"}, + {file = "simplejson-3.17.6-cp38-cp38-win32.whl", hash = "sha256:35a49ebef25f1ebdef54262e54ae80904d8692367a9f208cdfbc38dbf649e00a"}, + {file = "simplejson-3.17.6-cp38-cp38-win_amd64.whl", hash = "sha256:743cd768affaa508a21499f4858c5b824ffa2e1394ed94eb85caf47ac0732198"}, + {file = "simplejson-3.17.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb62d517a516128bacf08cb6a86ecd39fb06d08e7c4980251f5d5601d29989ba"}, + {file = "simplejson-3.17.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:12133863178a8080a3dccbf5cb2edfab0001bc41e5d6d2446af2a1131105adfe"}, + {file = "simplejson-3.17.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5540fba2d437edaf4aa4fbb80f43f42a8334206ad1ad3b27aef577fd989f20d9"}, + {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d74ee72b5071818a1a5dab47338e87f08a738cb938a3b0653b9e4d959ddd1fd9"}, + {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:28221620f4dcabdeac310846629b976e599a13f59abb21616356a85231ebd6ad"}, + {file = "simplejson-3.17.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b09bc62e5193e31d7f9876220fb429ec13a6a181a24d897b9edfbbdbcd678851"}, + {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7255a37ff50593c9b2f1afa8fafd6ef5763213c1ed5a9e2c6f5b9cc925ab979f"}, + {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:401d40969cee3df7bda211e57b903a534561b77a7ade0dd622a8d1a31eaa8ba7"}, + {file = "simplejson-3.17.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a649d0f66029c7eb67042b15374bd93a26aae202591d9afd71e111dd0006b198"}, + {file = "simplejson-3.17.6-cp39-cp39-win32.whl", hash = "sha256:522fad7be85de57430d6d287c4b635813932946ebf41b913fe7e880d154ade2e"}, + {file = "simplejson-3.17.6-cp39-cp39-win_amd64.whl", hash = "sha256:3fe87570168b2ae018391e2b43fbf66e8593a86feccb4b0500d134c998983ccc"}, + {file = "simplejson-3.17.6.tar.gz", hash = "sha256:cf98038d2abf63a1ada5730e91e84c642ba6c225b0198c3684151b1f80c5f8a6"}, ] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, @@ -2197,10 +2248,18 @@ types-python-dateutil = [ {file = "types-python-dateutil-2.8.19.tar.gz", hash = "sha256:bfd3eb39c7253aea4ba23b10f69b017d30b013662bb4be4ab48b20bbd763f309"}, {file = "types_python_dateutil-2.8.19-py3-none-any.whl", hash = "sha256:6284df1e4783d8fc6e587f0317a81333856b872a6669a282f8a325342bce7fa8"}, ] +types-pytz = [ + {file = "types-pytz-2022.2.1.0.tar.gz", hash = "sha256:47cfb19c52b9f75896440541db392fd312a35b279c6307a531db71152ea63e2b"}, + {file = "types_pytz-2022.2.1.0-py3-none-any.whl", hash = "sha256:50ead2254b524a3d4153bc65d00289b66898060d2938e586170dce918dbaf3b3"}, +] types-requests = [ {file = "types-requests-2.28.10.tar.gz", hash = "sha256:97d8f40aa1ffe1e58c3726c77d63c182daea9a72d9f1fa2cafdea756b2a19f2c"}, {file = "types_requests-2.28.10-py3-none-any.whl", hash = "sha256:45b485725ed58752f2b23461252f1c1ad9205b884a1e35f786bb295525a3e16a"}, ] +types-simplejson = [ + {file = "types-simplejson-3.17.7.tar.gz", hash = "sha256:801cfa251ef54b707f16256546f84f4e29156177875838c8f33e2c1527f62b47"}, + {file = "types_simplejson-3.17.7-py3-none-any.whl", hash = "sha256:d0efa5696274973030c39a336674a21e16cf2838a85aef3181d4d9f511b2eeef"}, +] types-urllib3 = [ {file = "types-urllib3-1.26.23.tar.gz", hash = "sha256:b78e819f0e350221d0689a5666162e467ba3910737bafda14b5c2c85e9bb1e56"}, {file = "types_urllib3-1.26.23-py3-none-any.whl", hash = "sha256:333e675b188a1c1fd980b4b352f9e40572413a4c1ac689c23cd546e96310070a"}, diff --git a/pyproject.toml b/pyproject.toml index 1d4f4ab2c..0ea9be559 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ license = "Apache 2.0" [tool.poetry.dependencies] python = "<3.11,>=3.7.1" -pipelinewise-singer-python = "1.2.0" backoff = ">=1.8.0,<2.0" pendulum = "^2.1.0" click = "~=8.0" @@ -52,6 +51,9 @@ inflection = "^0.5.1" sqlalchemy = "^1.4" python-dotenv = ">=0.20,<0.22" typing-extensions = "^4.2.0" +simplejson = "^3.17.6" +jsonschema = "^4.16.0" +pytz = "^2022.2.1" # Sphinx dependencies installed as optional 'docs' extras # https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-664002569 @@ -84,7 +86,9 @@ viztracer = "^0.15.4" requests-mock = "^1.10.0" sqlalchemy2-stubs = {version = "^0.0.2a27", allow-prereleases = true} types-python-dateutil = "^2.8.19" +types-pytz = "^2022.2.1.0" types-requests = "^2.28.10" +types-simplejson = "^3.17.7" coverage = {extras = ["toml"], version = "^6.4"} # Cookiecutter tests @@ -149,7 +153,7 @@ exclude_lines = [ "if __name__ == .__main__.:", '''class .*\bProtocol\):''', '''@(abc\.)?abstractmethod''', - "if TYPE_CHECKING:", + '''if (t\.)?TYPE_CHECKING:''', ] fail_under = 82 diff --git a/samples/sample_mapper/mapper.py b/samples/sample_mapper/mapper.py index e9fbf850e..2ad133383 100644 --- a/samples/sample_mapper/mapper.py +++ b/samples/sample_mapper/mapper.py @@ -3,8 +3,7 @@ from pathlib import PurePath from typing import Generator, List, Optional, Union -import singer - +import singer_sdk._singerlib as singer import singer_sdk.typing as th from singer_sdk.helpers._util import utc_now from singer_sdk.mapper import PluginMapper diff --git a/singer_sdk/_singerlib/__init__.py b/singer_sdk/_singerlib/__init__.py new file mode 100644 index 000000000..bbf243df3 --- /dev/null +++ b/singer_sdk/_singerlib/__init__.py @@ -0,0 +1,19 @@ +from singer_sdk._singerlib.catalog import ( + Catalog, + CatalogEntry, + Metadata, + MetadataMapping, + SelectionMask, + StreamMetadata, +) +from singer_sdk._singerlib.messages import ( + ActivateVersionMessage, + Message, + RecordMessage, + SchemaMessage, + SingerMessageType, + StateMessage, + exclude_null_dict, + write_message, +) +from singer_sdk._singerlib.schema import Schema diff --git a/singer_sdk/helpers/_singer.py b/singer_sdk/_singerlib/catalog.py similarity index 58% rename from singer_sdk/helpers/_singer.py rename to singer_sdk/_singerlib/catalog.py index c756f84a6..89dd789c5 100644 --- a/singer_sdk/helpers/_singer.py +++ b/singer_sdk/_singerlib/catalog.py @@ -2,34 +2,21 @@ import enum import logging +import typing as t from dataclasses import dataclass, fields -from typing import TYPE_CHECKING, Any, Dict, Iterable, Tuple, Union, cast -from singer.catalog import Catalog as BaseCatalog -from singer.catalog import CatalogEntry as BaseCatalogEntry +from singer_sdk._singerlib.schema import Schema -from singer_sdk.helpers._schema import SchemaPlus - -if TYPE_CHECKING: +if t.TYPE_CHECKING: from typing_extensions import TypeAlias -Breadcrumb = Tuple[str, ...] +Breadcrumb = t.Tuple[str, ...] logger = logging.getLogger(__name__) -class SingerMessageType(str, enum.Enum): - """Singer specification message types.""" - - RECORD = "RECORD" - SCHEMA = "SCHEMA" - STATE = "STATE" - ACTIVATE_VERSION = "ACTIVATE_VERSION" - BATCH = "BATCH" - - -class SelectionMask(Dict[Breadcrumb, bool]): +class SelectionMask(t.Dict[Breadcrumb, bool]): """Boolean mask for property selection in schemas and records.""" def __missing__(self, breadcrumb: Breadcrumb) -> bool: @@ -37,6 +24,12 @@ def __missing__(self, breadcrumb: Breadcrumb) -> bool: - Properties default to parent value if available. - Root (stream) defaults to True. + + Args: + breadcrumb: Breadcrumb to check. + + Returns: + True if the breadcrumb is selected, False otherwise. """ if len(breadcrumb) >= 2: parent = breadcrumb[:-2] @@ -61,8 +54,15 @@ class InclusionType(str, enum.Enum): selected_by_default: bool | None = None @classmethod - def from_dict(cls, value: dict[str, Any]): - """Parse metadata dictionary.""" + def from_dict(cls: t.Type[Metadata], value: dict[str, t.Any]) -> Metadata: + """Parse metadata dictionary. + + Args: + value: Metadata dictionary. + + Returns: + Metadata object. + """ return cls( **{ object_field.name: value.get(object_field.name.replace("_", "-")) @@ -70,8 +70,12 @@ def from_dict(cls, value: dict[str, Any]): } ) - def to_dict(self) -> dict[str, Any]: - """Convert metadata to a JSON-encodeable dictionary.""" + def to_dict(self) -> dict[str, t.Any]: + """Convert metadata to a JSON-encodeable dictionary. + + Returns: + Metadata object. + """ result = {} for object_field in fields(self): @@ -92,16 +96,26 @@ class StreamMetadata(Metadata): schema_name: str | None = None -AnyMetadata: TypeAlias = Union[Metadata, StreamMetadata] +AnyMetadata: TypeAlias = t.Union[Metadata, StreamMetadata] -class MetadataMapping(Dict[Breadcrumb, AnyMetadata]): +class MetadataMapping(t.Dict[Breadcrumb, AnyMetadata]): """Stream metadata mapping.""" @classmethod - def from_iterable(cls, iterable: Iterable[dict[str, Any]]): - """Create a metadata mapping from an iterable of metadata dictionaries.""" - mapping: dict[Breadcrumb, AnyMetadata] = cls() + def from_iterable( + cls: t.Type[MetadataMapping], + iterable: t.Iterable[dict[str, t.Any]], + ) -> MetadataMapping: + """Create a metadata mapping from an iterable of metadata dictionaries. + + Args: + iterable: t.Iterable of metadata dictionaries. + + Returns: + Metadata mapping. + """ + mapping = cls() for d in iterable: breadcrumb = tuple(d["breadcrumb"]) metadata = d["metadata"] @@ -112,33 +126,58 @@ def from_iterable(cls, iterable: Iterable[dict[str, Any]]): return mapping - def to_list(self) -> list[dict[str, Any]]: - """Convert mapping to a JSON-encodable list.""" + def to_list(self) -> list[dict[str, t.Any]]: + """Convert mapping to a JSON-encodable list. + + Returns: + List of metadata dictionaries. + """ return [ {"breadcrumb": list(k), "metadata": v.to_dict()} for k, v in self.items() ] - def __missing__(self, breadcrumb: Breadcrumb): - """Handle missing metadata entries.""" + def __missing__(self, breadcrumb: Breadcrumb) -> AnyMetadata: + """Handle missing metadata entries. + + Args: + breadcrumb: Breadcrumb to check. + + Returns: + Metadata object. + """ self[breadcrumb] = Metadata() if breadcrumb else StreamMetadata() return self[breadcrumb] @property - def root(self): - """Get stream (root) metadata from this mapping.""" - meta: StreamMetadata = self[()] - return meta + def root(self) -> StreamMetadata: + """Get stream (root) metadata from this mapping. + + Returns: + Stream metadata. + """ + return self[()] # type: ignore @classmethod def get_standard_metadata( - cls, - schema: dict[str, Any] | None = None, + cls: t.Type[MetadataMapping], + schema: dict[str, t.Any] | None = None, schema_name: str | None = None, key_properties: list[str] | None = None, valid_replication_keys: list[str] | None = None, replication_method: str | None = None, - ): - """Get default metadata for a stream.""" + ) -> MetadataMapping: + """Get default metadata for a stream. + + Args: + schema: Stream schema. + schema_name: Stream schema name. + key_properties: Stream key properties. + valid_replication_keys: Stream valid replication keys. + replication_method: Stream replication method. + + Returns: + Metadata mapping. + """ mapping = cls() root = StreamMetadata( table_key_properties=key_properties, @@ -165,7 +204,11 @@ def get_standard_metadata( return mapping def resolve_selection(self) -> SelectionMask: - """Resolve selection for metadata breadcrumbs and store them in a mapping.""" + """Resolve selection for metadata breadcrumbs and store them in a mapping. + + Returns: + Selection mask. + """ return SelectionMask( (breadcrumb, self._breadcrumb_is_selected(breadcrumb)) for breadcrumb in self @@ -176,6 +219,12 @@ def _breadcrumb_is_selected(self, breadcrumb: Breadcrumb) -> bool: An empty breadcrumb (empty tuple) indicates the stream itself. Otherwise, the breadcrumb is the path to a property within the stream. + + Args: + breadcrumb: Breadcrumb to check. + + Returns: + True if the breadcrumb is selected, False otherwise. """ if not self: # Default to true if no metadata to say otherwise @@ -225,12 +274,12 @@ def _breadcrumb_is_selected(self, breadcrumb: Breadcrumb) -> bool: @dataclass -class CatalogEntry(BaseCatalogEntry): +class CatalogEntry: """Singer catalog entry.""" tap_stream_id: str metadata: MetadataMapping - schema: SchemaPlus + schema: Schema stream: str | None = None key_properties: list[str] | None = None replication_key: str | None = None @@ -242,8 +291,15 @@ class CatalogEntry(BaseCatalogEntry): replication_method: str | None = None @classmethod - def from_dict(cls, stream: dict[str, Any]): - """Create a catalog entry from a dictionary.""" + def from_dict(cls: t.Type[CatalogEntry], stream: dict[str, t.Any]) -> CatalogEntry: + """Create a catalog entry from a dictionary. + + Args: + stream: A dictionary with the defined catalog stream. + + Returns: + A catalog entry. + """ return cls( tap_stream_id=stream["tap_stream_id"], stream=stream.get("stream"), @@ -251,49 +307,103 @@ def from_dict(cls, stream: dict[str, Any]): key_properties=stream.get("key_properties"), database=stream.get("database_name"), table=stream.get("table_name"), - schema=SchemaPlus.from_dict(stream.get("schema", {})), + schema=Schema.from_dict(stream.get("schema", {})), is_view=stream.get("is_view"), + row_count=stream.get("row_count"), stream_alias=stream.get("stream_alias"), metadata=MetadataMapping.from_iterable(stream.get("metadata", [])), replication_method=stream.get("replication_method"), ) - def to_dict(self): - """Convert entry to a dictionary.""" - d = super().to_dict() - d["metadata"] = self.metadata.to_list() - return d + def to_dict(self) -> dict[str, t.Any]: + """Convert entry to a dictionary. + + Returns: + A dictionary representation of the catalog entry. + """ + result: dict[str, t.Any] = {} + if self.tap_stream_id: + result["tap_stream_id"] = self.tap_stream_id + if self.database: + result["database_name"] = self.database + if self.table: + result["table_name"] = self.table + if self.replication_key is not None: + result["replication_key"] = self.replication_key + if self.replication_method is not None: + result["replication_method"] = self.replication_method + if self.key_properties is not None: + result["key_properties"] = self.key_properties + if self.schema is not None: + schema = self.schema.to_dict() # pylint: disable=no-member + result["schema"] = schema + if self.is_view is not None: + result["is_view"] = self.is_view + if self.stream is not None: + result["stream"] = self.stream + if self.row_count is not None: + result["row_count"] = self.row_count + if self.stream_alias is not None: + result["stream_alias"] = self.stream_alias + if self.metadata is not None: + result["metadata"] = self.metadata.to_list() + return result -class Catalog(Dict[str, CatalogEntry], BaseCatalog): +class Catalog(t.Dict[str, CatalogEntry]): """Singer catalog mapping of stream entries.""" @classmethod - def from_dict(cls, data: dict[str, list[dict[str, Any]]]) -> Catalog: - """Create a catalog from a dictionary.""" + def from_dict( + cls: t.Type[Catalog], + data: dict[str, list[dict[str, t.Any]]], + ) -> Catalog: + """Create a catalog from a dictionary. + + Args: + data: A dictionary with the defined catalog streams. + + Returns: + A catalog. + """ instance = cls() for stream in data.get("streams", []): entry = CatalogEntry.from_dict(stream) instance[entry.tap_stream_id] = entry return instance - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> dict[str, t.Any]: """Return a dictionary representation of the catalog. Returns: A dictionary with the defined catalog streams. """ - return cast(Dict[str, Any], super().to_dict()) + return {"streams": [stream.to_dict() for stream in self.streams]} @property def streams(self) -> list[CatalogEntry]: - """Get catalog entries.""" + """Get catalog entries. + + Returns: + A list of catalog entries. + """ return list(self.values()) def add_stream(self, entry: CatalogEntry) -> None: - """Add a stream entry to the catalog.""" + """Add a stream entry to the catalog. + + Args: + entry: The stream entry to add. + """ self[entry.tap_stream_id] = entry def get_stream(self, stream_id: str) -> CatalogEntry | None: - """Retrieve a stream entry from the catalog.""" + """Retrieve a stream entry from the catalog. + + Args: + stream_id: The tap stream id of the stream to retrieve. + + Returns: + The stream entry if found, otherwise None. + """ return self.get(stream_id) diff --git a/singer_sdk/_singerlib/messages.py b/singer_sdk/_singerlib/messages.py new file mode 100644 index 000000000..36c76f6bc --- /dev/null +++ b/singer_sdk/_singerlib/messages.py @@ -0,0 +1,175 @@ +"""Singer message types and utilities.""" + +from __future__ import annotations + +import enum +import sys +import typing as t +from dataclasses import asdict, dataclass, field +from datetime import datetime + +import pytz +import simplejson as json + + +class SingerMessageType(str, enum.Enum): + """Singer specification message types.""" + + RECORD = "RECORD" + SCHEMA = "SCHEMA" + STATE = "STATE" + ACTIVATE_VERSION = "ACTIVATE_VERSION" + BATCH = "BATCH" + + +def exclude_null_dict(pairs: list[tuple[str, t.Any]]) -> dict[str, t.Any]: + """Exclude null values from a dictionary. + + Args: + pairs: The dictionary key-value pairs. + + Returns: + The filtered key-value pairs. + """ + return {key: value for key, value in pairs if value is not None} + + +@dataclass +class Message: + """Singer base message.""" + + type: SingerMessageType = field(init=False) + """The message type.""" + + def to_dict(self) -> dict[str, t.Any]: + """Return a dictionary representation of the message. + + Returns: + A dictionary with the defined message fields. + """ + return asdict(self, dict_factory=exclude_null_dict) + + @classmethod + def from_dict(cls: t.Type[Message], data: dict[str, t.Any]) -> Message: + """Create an encoding from a dictionary. + + Args: + data: The dictionary to create the message from. + + Returns: + The created message. + """ + data.pop("type") + return cls(**data) + + +@dataclass +class RecordMessage(Message): + """Singer record message.""" + + stream: str + """The stream name.""" + + record: dict[str, t.Any] + """The record data.""" + + version: int | None = None + """The record version.""" + + time_extracted: datetime | None = None + """The time the record was extracted.""" + + def __post_init__(self) -> None: + """Post-init processing. + + Raises: + ValueError: If the time_extracted is not timezone-aware. + """ + self.type = SingerMessageType.RECORD + if self.time_extracted and not self.time_extracted.tzinfo: + raise ValueError( + "'time_extracted' must be either None " + + "or an aware datetime (with a time zone)" + ) + + if self.time_extracted: + self.time_extracted = self.time_extracted.astimezone(pytz.utc) + + +@dataclass +class SchemaMessage(Message): + """Singer schema message.""" + + stream: str + """The stream name.""" + + schema: dict[str, t.Any] + """The schema definition.""" + + key_properties: list[str] | None = None + """The key properties.""" + + bookmark_properties: list[str] | None = None + """The bookmark properties.""" + + def __post_init__(self) -> None: + """Post-init processing. + + Raises: + ValueError: If bookmark_properties is not a string or list of strings. + """ + self.type = SingerMessageType.SCHEMA + + if isinstance(self.bookmark_properties, (str, bytes)): + self.bookmark_properties = [self.bookmark_properties] + if self.bookmark_properties and not isinstance(self.bookmark_properties, list): + raise ValueError("bookmark_properties must be a string or list of strings") + + +@dataclass +class StateMessage(Message): + """Singer state message.""" + + value: dict[str, t.Any] + """The state value.""" + + def __post_init__(self) -> None: + """Post-init processing.""" + self.type = SingerMessageType.STATE + + +@dataclass +class ActivateVersionMessage(Message): + """Singer activate version message.""" + + stream: str + """The stream name.""" + + version: int + """The version to activate.""" + + def __post_init__(self) -> None: + """Post-init processing.""" + self.type = SingerMessageType.ACTIVATE_VERSION + + +def format_message(message: Message) -> str: + """Format a message as a JSON string. + + Args: + message: The message to format. + + Returns: + The formatted message. + """ + return json.dumps(message.to_dict(), use_decimal=True, default=str) + + +def write_message(message: Message) -> None: + """Write a message to stdout. + + Args: + message: The message to write. + """ + sys.stdout.write(format_message(message) + "\n") + sys.stdout.flush() diff --git a/singer_sdk/helpers/_schema.py b/singer_sdk/_singerlib/schema.py similarity index 59% rename from singer_sdk/helpers/_schema.py rename to singer_sdk/_singerlib/schema.py index 742c6c34f..09f9adc4f 100644 --- a/singer_sdk/helpers/_schema.py +++ b/singer_sdk/_singerlib/schema.py @@ -1,9 +1,9 @@ """Provides an object model for JSON Schema.""" -from dataclasses import dataclass -from typing import Any, List, Optional, Union +from __future__ import annotations -from singer import Schema +import typing as t +from dataclasses import dataclass # These are keys defined in the JSON Schema spec that do not themselves contain # schemas (or lists of schemas) @@ -30,7 +30,7 @@ @dataclass -class SchemaPlus(Schema): +class Schema: """Object model for JSON Schema. Tap and Target authors may find this to be more convenient than @@ -39,30 +39,33 @@ class SchemaPlus(Schema): This is based on, and overwrites https://github.com/transferwise/pipelinewise-singer-python/blob/master/singer/schema.py. This is because we wanted to expand it with extra STANDARD_KEYS. - """ - type: Optional[Union[str, List[str]]] = None - properties: Optional[dict] = None - items: Optional[Any] = None - description: Optional[str] = None - minimum: Optional[float] = None - maximum: Optional[float] = None - exclusiveMinimum: Optional[float] = None - exclusiveMaximum: Optional[float] = None - multipleOf: Optional[float] = None - maxLength: Optional[int] = None - minLength: Optional[int] = None - anyOf: Optional[Any] = None - format: Optional[str] = None - additionalProperties: Optional[Any] = None - patternProperties: Optional[Any] = None - required: Optional[List[str]] = None - enum: Optional[List[Any]] = None - title: Optional[str] = None - - def to_dict(self): - """Return the raw JSON Schema as a (possibly nested) dict.""" + type: str | list[str] | None = None + properties: dict | None = None + items: t.Any | None = None + description: str | None = None + minimum: float | None = None + maximum: float | None = None + exclusiveMinimum: float | None = None + exclusiveMaximum: float | None = None + multipleOf: float | None = None + maxLength: int | None = None + minLength: int | None = None + anyOf: t.Any | None = None + format: str | None = None + additionalProperties: t.Any | None = None + patternProperties: t.Any | None = None + required: list[str] | None = None + enum: list[t.Any] | None = None + title: str | None = None + + def to_dict(self) -> dict[str, t.Any]: + """Return the raw JSON Schema as a (possibly nested) dict. + + Returns: + The raw JSON Schema as a (possibly nested) dict. + """ result = {} if self.properties is not None: @@ -78,10 +81,15 @@ def to_dict(self): return result @classmethod - def from_dict(cls, data, **schema_defaults): + def from_dict(cls: t.Type[Schema], data: dict, **schema_defaults: t.Any) -> Schema: """Initialize a Schema object based on the JSON Schema structure. - :param schema_defaults: The default values to the Schema constructor. + Args: + data: The JSON Schema structure. + schema_defaults: Default values for the schema. + + Returns: + The initialized Schema object. """ kwargs = schema_defaults.copy() properties = data.get("properties") @@ -89,12 +97,11 @@ def from_dict(cls, data, **schema_defaults): if properties is not None: kwargs["properties"] = { - k: SchemaPlus.from_dict(v, **schema_defaults) - for k, v in properties.items() + k: cls.from_dict(v, **schema_defaults) for k, v in properties.items() } if items is not None: - kwargs["items"] = SchemaPlus.from_dict(items, **schema_defaults) + kwargs["items"] = cls.from_dict(items, **schema_defaults) for key in STANDARD_KEYS: if key in data: kwargs[key] = data[key] - return SchemaPlus(**kwargs) + return cls(**kwargs) diff --git a/singer_sdk/authenticators.py b/singer_sdk/authenticators.py index 468b607e8..e1b948abb 100644 --- a/singer_sdk/authenticators.py +++ b/singer_sdk/authenticators.py @@ -13,7 +13,6 @@ import requests from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization -from singer import utils from singer_sdk.helpers._util import utc_now from singer_sdk.streams import Stream as RESTStreamBase @@ -426,7 +425,7 @@ def is_token_valid(self) -> bool: return False if not self.expires_in: return True - if self.expires_in > (utils.now() - self.last_refreshed).total_seconds(): + if self.expires_in > (utc_now() - self.last_refreshed).total_seconds(): return True return False diff --git a/singer_sdk/helpers/_batch.py b/singer_sdk/helpers/_batch.py index 161e0097c..5bac888ba 100644 --- a/singer_sdk/helpers/_batch.py +++ b/singer_sdk/helpers/_batch.py @@ -3,25 +3,18 @@ from __future__ import annotations import enum -import sys from contextlib import contextmanager from dataclasses import asdict, dataclass, field from typing import IO, TYPE_CHECKING, Any, ClassVar, Generator from urllib.parse import ParseResult, parse_qs, urlencode, urlparse import fs -from singer.messages import Message -from singer_sdk.helpers._singer import SingerMessageType +from singer_sdk._singerlib.messages import Message, SingerMessageType if TYPE_CHECKING: from fs.base import FS - if sys.version_info >= (3, 8): - from typing import Literal - else: - from typing_extensions import Literal - class BatchFileFormat(str, enum.Enum): """Batch file format.""" @@ -77,9 +70,6 @@ class JSONLinesEncoding(BaseBatchFileEncoding): class SDKBatchMessage(Message): """Singer batch message in the Meltano SDK flavor.""" - type: Literal[SingerMessageType.BATCH] = field(init=False) - """The message type.""" - stream: str """The stream name.""" @@ -95,27 +85,6 @@ def __post_init__(self): self.type = SingerMessageType.BATCH - def asdict(self): - """Return a dictionary representation of the message. - - Returns: - A dictionary with the defined message fields. - """ - return asdict(self) - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> SDKBatchMessage: - """Create an encoding from a dictionary. - - Args: - data: The dictionary to create the message from. - - Returns: - The created message. - """ - data.pop("type") - return cls(**data) - @dataclass class StorageTarget: diff --git a/singer_sdk/helpers/_catalog.py b/singer_sdk/helpers/_catalog.py index df7493b4d..70a883157 100644 --- a/singer_sdk/helpers/_catalog.py +++ b/singer_sdk/helpers/_catalog.py @@ -6,7 +6,7 @@ from memoization import cached -from singer_sdk.helpers._singer import Catalog, SelectionMask +from singer_sdk._singerlib import Catalog, SelectionMask from singer_sdk.helpers._typing import is_object_type _MAX_LRU_CACHE = 500 diff --git a/singer_sdk/io_base.py b/singer_sdk/io_base.py index 32f48ae36..c919dffe4 100644 --- a/singer_sdk/io_base.py +++ b/singer_sdk/io_base.py @@ -10,8 +10,8 @@ from typing import IO from typing import Counter as CounterType +from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._compat import final -from singer_sdk.helpers._singer import SingerMessageType logger = logging.getLogger(__name__) diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index c94a2d34d..48918def2 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -11,6 +11,7 @@ import logging from typing import Any, Callable +from singer_sdk._singerlib.catalog import Catalog from singer_sdk.exceptions import MapExpressionError, StreamMapConfigError from singer_sdk.helpers import _simpleeval as simpleeval from singer_sdk.helpers._catalog import get_selected_schema @@ -20,7 +21,6 @@ flatten_schema, get_flattening_options, ) -from singer_sdk.helpers._singer import Catalog from singer_sdk.typing import ( CustomType, IntegerType, diff --git a/singer_sdk/mapper_base.py b/singer_sdk/mapper_base.py index c77a88966..0f7998a5d 100644 --- a/singer_sdk/mapper_base.py +++ b/singer_sdk/mapper_base.py @@ -5,8 +5,8 @@ from typing import Callable, Iterable, List, Tuple import click -import singer +import singer_sdk._singerlib as singer from singer_sdk.cli import common_options from singer_sdk.configuration._dict_config import merge_config_sources from singer_sdk.helpers._classproperty import classproperty diff --git a/singer_sdk/streams/core.py b/singer_sdk/streams/core.py index bcc003029..9c7d2e2d6 100644 --- a/singer_sdk/streams/core.py +++ b/singer_sdk/streams/core.py @@ -17,9 +17,8 @@ import pendulum import requests -import singer -from singer import RecordMessage, Schema, SchemaMessage, StateMessage +import singer_sdk._singerlib as singer from singer_sdk.exceptions import InvalidStreamSortException, MaxRecordsLimitException from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, @@ -29,13 +28,6 @@ from singer_sdk.helpers._catalog import pop_deselected_record_properties from singer_sdk.helpers._compat import final from singer_sdk.helpers._flattening import get_flattening_options -from singer_sdk.helpers._schema import SchemaPlus -from singer_sdk.helpers._singer import ( - Catalog, - CatalogEntry, - MetadataMapping, - SelectionMask, -) from singer_sdk.helpers._state import ( finalize_state_progress_markers, get_starting_replication_value, @@ -104,7 +96,7 @@ class Stream(metaclass=abc.ABCMeta): def __init__( self, tap: TapBaseClass, - schema: str | PathLike | dict[str, Any] | Schema | None = None, + schema: str | PathLike | dict[str, Any] | singer.Schema | None = None, name: str | None = None, ) -> None: """Init tap stream. @@ -128,15 +120,15 @@ def __init__( self._config: dict = dict(tap.config) self._tap = tap self._tap_state = tap.state - self._tap_input_catalog: Catalog | None = None + self._tap_input_catalog: singer.Catalog | None = None self._stream_maps: list[StreamMap] | None = None self.forced_replication_method: str | None = None self._replication_key: str | None = None self._primary_keys: list[str] | None = None self._state_partitioning_keys: list[str] | None = None self._schema_filepath: Path | None = None - self._metadata: MetadataMapping | None = None - self._mask: SelectionMask | None = None + self._metadata: singer.MetadataMapping | None = None + self._mask: singer.SelectionMask | None = None self._schema: dict self.child_streams: list[Stream] = [] if schema: @@ -149,7 +141,7 @@ def __init__( self._schema_filepath = Path(schema) elif isinstance(schema, dict): self._schema = schema - elif isinstance(schema, Schema): + elif isinstance(schema, singer.Schema): self._schema = schema.to_dict() else: raise ValueError( @@ -503,7 +495,7 @@ def check_sorted(self) -> bool: return True @property - def metadata(self) -> MetadataMapping: + def metadata(self) -> singer.MetadataMapping: """Get stream metadata. Metadata attributes (`inclusion`, `selected`, etc.) are part of the Singer spec. @@ -522,7 +514,7 @@ def metadata(self) -> MetadataMapping: self._metadata = catalog_entry.metadata return self._metadata - self._metadata = MetadataMapping.get_standard_metadata( + self._metadata = singer.MetadataMapping.get_standard_metadata( schema=self.schema, replication_method=self.forced_replication_method, key_properties=self.primary_keys or [], @@ -539,16 +531,16 @@ def metadata(self) -> MetadataMapping: return self._metadata @property - def _singer_catalog_entry(self) -> CatalogEntry: + def _singer_catalog_entry(self) -> singer.CatalogEntry: """Return catalog entry as specified by the Singer catalog spec. Returns: TODO """ - return CatalogEntry( + return singer.CatalogEntry( tap_stream_id=self.tap_stream_id, stream=self.name, - schema=SchemaPlus.from_dict(self.schema), + schema=singer.Schema.from_dict(self.schema), metadata=self.metadata, key_properties=self.primary_keys or [], replication_key=self.replication_key, @@ -561,13 +553,13 @@ def _singer_catalog_entry(self) -> CatalogEntry: ) @property - def _singer_catalog(self) -> Catalog: + def _singer_catalog(self) -> singer.Catalog: """TODO. Returns: TODO """ - return Catalog([(self.tap_stream_id, self._singer_catalog_entry)]) + return singer.Catalog([(self.tap_stream_id, self._singer_catalog_entry)]) @property def config(self) -> Mapping[str, Any]: @@ -741,9 +733,9 @@ def _increment_stream_state( def _write_state_message(self) -> None: """Write out a STATE message with the latest state.""" - singer.write_message(StateMessage(value=self.tap_state)) + singer.write_message(singer.StateMessage(value=self.tap_state)) - def _generate_schema_messages(self) -> Generator[SchemaMessage, None, None]: + def _generate_schema_messages(self) -> Generator[singer.SchemaMessage, None, None]: """Generate schema messages from stream maps. Yields: @@ -755,7 +747,7 @@ def _generate_schema_messages(self) -> Generator[SchemaMessage, None, None]: # Don't emit schema if the stream's records are all ignored. continue - schema_message = SchemaMessage( + schema_message = singer.SchemaMessage( stream_map.stream_alias, stream_map.transformed_schema, stream_map.transformed_key_properties, @@ -769,7 +761,7 @@ def _write_schema_message(self) -> None: singer.write_message(schema_message) @property - def mask(self) -> SelectionMask: + def mask(self) -> singer.SelectionMask: """Get a boolean mask for stream and property selection. Returns: @@ -783,7 +775,7 @@ def mask(self) -> SelectionMask: def _generate_record_messages( self, record: dict, - ) -> Generator[RecordMessage, None, None]: + ) -> Generator[singer.RecordMessage, None, None]: """Write out a RECORD message. Args: @@ -803,7 +795,7 @@ def _generate_record_messages( mapped_record = stream_map.transform(record) # Emit record if not filtered if mapped_record is not None: - record_message = RecordMessage( + record_message = singer.RecordMessage( stream=stream_map.stream_alias, record=mapped_record, version=None, @@ -1172,7 +1164,7 @@ def _sync_children(self, child_context: dict) -> None: # Overridable Methods - def apply_catalog(self, catalog: Catalog) -> None: + def apply_catalog(self, catalog: singer.Catalog) -> None: """Apply a catalog dict, updating any settings overridden within the catalog. Developers may override this method in order to introduce advanced catalog diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index a1b52cb10..11c59e236 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -12,8 +12,8 @@ import backoff import requests -from singer.schema import Schema +from singer_sdk._singerlib import Schema from singer_sdk.authenticators import APIAuthenticatorBase, SimpleAuthenticator from singer_sdk.exceptions import FatalAPIError, RetriableAPIError from singer_sdk.helpers.jsonpath import extract_jsonpath diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index deb41ea33..83c808cb7 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -13,9 +13,8 @@ from sqlalchemy.engine.reflection import Inspector from singer_sdk import typing as th +from singer_sdk._singerlib import CatalogEntry, MetadataMapping, Schema from singer_sdk.exceptions import ConfigValidationError -from singer_sdk.helpers._schema import SchemaPlus -from singer_sdk.helpers._singer import CatalogEntry, MetadataMapping from singer_sdk.plugin_base import PluginBase as TapBaseClass from singer_sdk.streams.core import Stream @@ -403,7 +402,7 @@ def discover_catalog_entry( stream=unique_stream_id, table=table_name, key_properties=key_properties, - schema=SchemaPlus.from_dict(schema), + schema=Schema.from_dict(schema), is_view=is_view, replication_method=replication_method, metadata=MetadataMapping.get_standard_metadata( diff --git a/singer_sdk/tap_base.py b/singer_sdk/tap_base.py index 75f1b481e..6873cd22e 100644 --- a/singer_sdk/tap_base.py +++ b/singer_sdk/tap_base.py @@ -8,12 +8,12 @@ import click +from singer_sdk._singerlib import Catalog from singer_sdk.cli import common_options from singer_sdk.exceptions import MaxRecordsLimitException from singer_sdk.helpers import _state from singer_sdk.helpers._classproperty import classproperty from singer_sdk.helpers._compat import final -from singer_sdk.helpers._singer import Catalog from singer_sdk.helpers._state import write_stream_state from singer_sdk.helpers._util import read_json_file from singer_sdk.helpers.capabilities import ( @@ -81,7 +81,7 @@ def __init__( if isinstance(catalog, Catalog): self._input_catalog = catalog elif isinstance(catalog, dict): - self._input_catalog = Catalog.from_dict(catalog) + self._input_catalog = Catalog.from_dict(catalog) # type: ignore elif catalog is not None: self._input_catalog = Catalog.from_dict(read_json_file(catalog)) @@ -242,7 +242,7 @@ def _singer_catalog(self) -> Catalog: """Return a Catalog object. Returns: - :class:`singer_sdk.helpers._singer.Catalog`. + :class:`singer_sdk._singerlib.Catalog`. """ return Catalog( (stream.tap_stream_id, stream._singer_catalog_entry) diff --git a/singer_sdk/testing.py b/singer_sdk/testing.py index b0c8a926d..84d575bae 100644 --- a/singer_sdk/testing.py +++ b/singer_sdk/testing.py @@ -4,7 +4,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Callable, List, Optional, Tuple, Type, cast -from singer_sdk.helpers import _singer +import singer_sdk._singerlib as singer from singer_sdk.mapper_base import InlineMapper from singer_sdk.tap_base import Tap from singer_sdk.target_base import Target @@ -155,7 +155,7 @@ def _select_all(catalog_dict: dict) -> dict: Returns: dict: [description] """ - catalog = _singer.Catalog.from_dict(catalog_dict) + catalog = singer.Catalog.from_dict(catalog_dict) for catalog_entry in catalog.streams: catalog_entry.metadata.root.selected = True diff --git a/tests/_singerlib/test_catalog.py b/tests/_singerlib/test_catalog.py new file mode 100644 index 000000000..26a64f887 --- /dev/null +++ b/tests/_singerlib/test_catalog.py @@ -0,0 +1,267 @@ +from typing import List, Optional + +import pytest + +from singer_sdk._singerlib import ( + Catalog, + CatalogEntry, + Metadata, + MetadataMapping, + SelectionMask, + StreamMetadata, +) + +METADATA_ARRAY = [ + { + "breadcrumb": [], + "metadata": { + "selected": True, + "forced-replication-method": "FULL_TABLE", + }, + }, + { + "breadcrumb": ["properties", "id"], + "metadata": { + "inclusion": "automatic", + "selected": True, + }, + }, + { + "breadcrumb": ["properties", "updated_at"], + "metadata": { + "inclusion": "automatic", + "selected": False, + }, + }, + { + "breadcrumb": ["properties", "name"], + "metadata": { + "inclusion": "available", + "selected": True, + }, + }, + { + "breadcrumb": ["properties", "an_object"], + "metadata": {"selected": False}, + }, + { + "breadcrumb": ["properties", "an_object", "properties", "nested"], + "metadata": { + "selected": True, + }, + }, + { + "breadcrumb": ["properties", "not_supported_selected"], + "metadata": { + "inclusion": "unsupported", + "selected": True, + }, + }, + { + "breadcrumb": ["properties", "not_supported_not_selected"], + "metadata": { + "inclusion": "unsupported", + "selected": False, + }, + }, + { + "breadcrumb": ["properties", "selected_by_default"], + "metadata": { + "inclusion": "available", + "selected-by-default": True, + }, + }, +] + + +def test_selection_mask(): + mask = SelectionMask( + [ + (("properties", "id"), False), + (("properties", "an_object"), False), + (("properties", "an_object", "properties", "a_string"), True), + ], + ) + # Missing root breadcrumb is selected + assert mask[()] is True + + # Explicitly deselected + assert mask[("properties", "id")] is False + + # Missing defaults to parent selection + assert mask[("properties", "name")] is True + + # Explicitly selected + assert mask[("properties", "an_object")] is False + + # Missing defaults to parent selection + assert mask[("properties", "an_object", "properties", "id")] is False + + # Explicitly selected nested property + assert mask[("properties", "an_object", "properties", "a_string")] is True + + +def test_metadata_mapping(): + mapping = MetadataMapping.from_iterable(METADATA_ARRAY) + + assert ( + mapping[()] + == mapping.root + == StreamMetadata( + selected=True, + forced_replication_method="FULL_TABLE", + ) + ) + assert mapping[("properties", "id")] == Metadata( + inclusion=Metadata.InclusionType.AUTOMATIC, + selected=True, + ) + assert mapping[("properties", "name")] == Metadata( + inclusion=Metadata.InclusionType.AVAILABLE, + selected=True, + ) + assert mapping[("properties", "missing")] == Metadata() + + selection_mask = mapping.resolve_selection() + assert selection_mask[()] is True + assert selection_mask[("properties", "id")] is True + assert selection_mask[("properties", "updated_at")] is True + assert selection_mask[("properties", "name")] is True + assert selection_mask[("properties", "missing")] is True + assert selection_mask[("properties", "an_object")] is False + assert selection_mask[("properties", "an_object", "properties", "nested")] is False + assert selection_mask[("properties", "not_supported_selected")] is False + assert selection_mask[("properties", "not_supported_not_selected")] is False + assert selection_mask[("properties", "selected_by_default")] is True + + +def test_empty_metadata_mapping(): + """Check that empty metadata mapping results in stream being selected.""" + mapping = MetadataMapping() + assert mapping._breadcrumb_is_selected(()) is True + + +def test_catalog_parsing(): + """Validate parsing works for a catalog and its stream entries.""" + catalog_dict = { + "streams": [ + { + "tap_stream_id": "test", + "database_name": "app_db", + "row_count": 10000, + "stream_alias": "test_alias", + "metadata": [ + { + "breadcrumb": [], + "metadata": { + "inclusion": "available", + }, + }, + { + "breadcrumb": ["properties", "a"], + "metadata": { + "inclusion": "unsupported", + }, + }, + ], + "schema": { + "type": "object", + }, + }, + ], + } + catalog = Catalog.from_dict(catalog_dict) + + assert catalog.streams[0].tap_stream_id == "test" + assert catalog.streams[0].database == "app_db" + assert catalog.streams[0].row_count == 10000 + assert catalog.streams[0].stream_alias == "test_alias" + assert catalog.get_stream("test").tap_stream_id == "test" + assert catalog["test"].metadata.to_list() == catalog_dict["streams"][0]["metadata"] + assert catalog["test"].tap_stream_id == catalog_dict["streams"][0]["tap_stream_id"] + assert catalog["test"].schema.to_dict() == {"type": "object"} + assert catalog.to_dict() == catalog_dict + + new = { + "tap_stream_id": "new", + "metadata": [], + "schema": {}, + } + entry = CatalogEntry.from_dict(new) + catalog.add_stream(entry) + assert catalog.get_stream("new") == entry + + +@pytest.mark.parametrize( + "schema,key_properties,replication_method,valid_replication_keys,schema_name", + [ + ( + {"properties": {"id": {"type": "integer"}}, "type": "object"}, + ["id"], + "FULL_TABLE", + None, + None, + ), + ( + { + "properties": { + "first_name": {"type": "string"}, + "last_name": {"type": "string"}, + }, + "type": "object", + }, + ["first_name", "last_name"], + "INCREMENTAL", + ["updated_at"], + "users", + ), + ( + { + "properties": { + "first_name": {"type": "string"}, + "last_name": {"type": "string"}, + "group": {"type": "string"}, + }, + "type": "object", + }, + ["first_name", "last_name"], + "FULL_TABLE", + None, + None, + ), + ( + {}, + [], + None, + None, + None, + ), + ], +) +def test_standard_metadata( + schema: dict, + key_properties: List[str], + replication_method: Optional[str], + valid_replication_keys: Optional[List[str]], + schema_name: Optional[str], +): + """Validate generated metadata.""" + metadata = MetadataMapping.get_standard_metadata( + schema=schema, + schema_name=schema_name, + key_properties=key_properties, + replication_method=replication_method, + valid_replication_keys=valid_replication_keys, + ) + + stream_metadata = metadata[()] + assert stream_metadata.table_key_properties == key_properties + assert stream_metadata.forced_replication_method == replication_method + assert stream_metadata.valid_replication_keys == valid_replication_keys + assert stream_metadata.selected is None + assert stream_metadata.schema_name == schema_name + + for pk in key_properties: + pk_metadata = metadata[("properties", pk)] + assert pk_metadata.inclusion == Metadata.InclusionType.AUTOMATIC + assert pk_metadata.selected is None diff --git a/tests/_singerlib/test_messages.py b/tests/_singerlib/test_messages.py new file mode 100644 index 000000000..b183cbde0 --- /dev/null +++ b/tests/_singerlib/test_messages.py @@ -0,0 +1,139 @@ +import io +from contextlib import redirect_stdout +from datetime import datetime + +import pytest +from pytz import UTC, timezone + +import singer_sdk._singerlib as singer +from singer_sdk._singerlib.messages import format_message + + +def test_exclude_null_dict(): + pairs = [("a", 1), ("b", None), ("c", 3)] + assert singer.exclude_null_dict(pairs) == {"a": 1, "c": 3} + + +def test_format_message(): + message = singer.RecordMessage( + stream="test", + record={"id": 1, "name": "test"}, + ) + assert format_message(message) == ( + '{"type": "RECORD", "stream": "test", "record": {"id": 1, "name": "test"}}' + ) + + +def test_write_message(): + message = singer.RecordMessage( + stream="test", + record={"id": 1, "name": "test"}, + ) + with redirect_stdout(io.StringIO()) as out: + singer.write_message(message) + + assert out.getvalue() == ( + '{"type": "RECORD", "stream": "test", "record": {"id": 1, "name": "test"}}\n' + ) + + +def test_record_message(): + record = singer.RecordMessage( + stream="test", + record={"id": 1, "name": "test"}, + ) + assert record.stream == "test" + assert record.record == {"id": 1, "name": "test"} + assert record.to_dict() == { + "type": "RECORD", + "stream": "test", + "record": {"id": 1, "name": "test"}, + } + + assert singer.RecordMessage.from_dict(record.to_dict()) == record + + +def test_record_message_naive_time_extracted(): + """Check that record message' time_extracted must be timezone-aware.""" + with pytest.raises(ValueError, match="must be either None or an aware datetime"): + singer.RecordMessage( + stream="test", + record={"id": 1, "name": "test"}, + time_extracted=datetime(2021, 1, 1), + ) + + +def test_record_message_time_extracted_to_utc(): + """Check that record message's time_extracted is converted to UTC.""" + naive = datetime(2021, 1, 1, 12) + nairobi = timezone("Africa/Nairobi") + + record = singer.RecordMessage( + stream="test", + record={"id": 1, "name": "test"}, + time_extracted=nairobi.localize(naive), + ) + assert record.time_extracted == datetime(2021, 1, 1, 9, tzinfo=UTC) + + +def test_schema_message(): + schema = singer.SchemaMessage( + stream="test", + schema={"type": "object", "properties": {"id": {"type": "integer"}}}, + ) + assert schema.stream == "test" + assert schema.schema == { + "type": "object", + "properties": {"id": {"type": "integer"}}, + } + assert schema.to_dict() == { + "type": "SCHEMA", + "stream": "test", + "schema": {"type": "object", "properties": {"id": {"type": "integer"}}}, + } + + assert singer.SchemaMessage.from_dict(schema.to_dict()) == schema + + +def test_schema_messages_string_bookmark_properties(): + """Check that schema message's bookmark_properties can be a string.""" + schema = singer.SchemaMessage( + stream="test", + schema={"type": "object", "properties": {"id": {"type": "integer"}}}, + bookmark_properties="id", + ) + assert schema.bookmark_properties == ["id"] + + +def test_bookmark_properties_not_string_or_list(): + """Check that schema message's bookmark_properties must be a string or list.""" + with pytest.raises(ValueError, match="must be a string or list"): + singer.SchemaMessage( + stream="test", + schema={"type": "object", "properties": {"id": {"type": "integer"}}}, + bookmark_properties=1, + ) + + +def test_state_message(): + state = singer.StateMessage(value={"bookmarks": {"test": {"id": 1}}}) + assert state.value == {"bookmarks": {"test": {"id": 1}}} + assert state.to_dict() == { + "type": "STATE", + "value": {"bookmarks": {"test": {"id": 1}}}, + } + + assert singer.StateMessage.from_dict(state.to_dict()) == state + + +def test_activate_version_message(): + version = singer.ActivateVersionMessage(stream="test", version=1) + assert version.stream == "test" + assert version.version == 1 + assert version.to_dict() == { + "type": "ACTIVATE_VERSION", + "stream": "test", + "version": 1, + } + + assert singer.ActivateVersionMessage.from_dict(version.to_dict()) == version diff --git a/tests/_singerlib/test_schema.py b/tests/_singerlib/test_schema.py new file mode 100644 index 000000000..781d163cd --- /dev/null +++ b/tests/_singerlib/test_schema.py @@ -0,0 +1,86 @@ +import pytest + +from singer_sdk._singerlib import Schema + +STRING_SCHEMA = Schema(type="string", maxLength=32) +STRING_DICT = {"type": "string", "maxLength": 32} +INTEGER_SCHEMA = Schema(type="integer", maximum=1000000) +INTEGER_DICT = {"type": "integer", "maximum": 1000000} +ARRAY_SCHEMA = Schema(type="array", items=INTEGER_SCHEMA) +ARRAY_DICT = {"type": "array", "items": INTEGER_DICT} +OBJECT_SCHEMA = Schema( + type="object", + properties={ + "a_string": STRING_SCHEMA, + "an_array": ARRAY_SCHEMA, + }, + additionalProperties=True, + required=["a_string"], +) +OBJECT_DICT = { + "type": "object", + "properties": { + "a_string": STRING_DICT, + "an_array": ARRAY_DICT, + }, + "additionalProperties": True, + "required": ["a_string"], +} + + +@pytest.mark.parametrize( + "schema,expected", + [ + pytest.param( + STRING_SCHEMA, + STRING_DICT, + id="string_to_dict", + ), + pytest.param( + INTEGER_SCHEMA, + INTEGER_DICT, + id="integer_to_dict", + ), + pytest.param( + ARRAY_SCHEMA, + ARRAY_DICT, + id="array_to_dict", + ), + pytest.param( + OBJECT_SCHEMA, + OBJECT_DICT, + id="object_to_dict", + ), + ], +) +def test_schema_to_dict(schema, expected): + assert schema.to_dict() == expected + + +@pytest.mark.parametrize( + "pydict,expected", + [ + pytest.param( + STRING_DICT, + STRING_SCHEMA, + id="schema_from_string_dict", + ), + pytest.param( + INTEGER_DICT, + INTEGER_SCHEMA, + id="schema_from_integer_dict", + ), + pytest.param( + ARRAY_DICT, + ARRAY_SCHEMA, + id="schema_from_array_dict", + ), + pytest.param( + OBJECT_DICT, + OBJECT_SCHEMA, + id="schema_from_object_dict", + ), + ], +) +def test_schema_from_dict(pydict, expected): + assert Schema.from_dict(pydict) == expected diff --git a/tests/core/test_catalog_selection.py b/tests/core/test_catalog_selection.py index 2f203be5d..d5a94b371 100644 --- a/tests/core/test_catalog_selection.py +++ b/tests/core/test_catalog_selection.py @@ -5,12 +5,11 @@ import pytest -import singer_sdk.helpers._singer as singer +import singer_sdk._singerlib as singer from singer_sdk.helpers._catalog import ( get_selected_schema, pop_deselected_record_properties, ) -from singer_sdk.helpers._schema import SchemaPlus from singer_sdk.typing import ObjectType, PropertiesList, Property, StringType @@ -152,7 +151,7 @@ def catalog_entry_obj(schema, stream_name, selection_metadata) -> singer.Catalog return singer.CatalogEntry( tap_stream_id=stream_name, stream=stream_name, - schema=SchemaPlus.from_dict(schema), + schema=singer.Schema.from_dict(schema), metadata=singer.MetadataMapping.from_iterable(selection_metadata), ) diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index 233ae8ae7..ba9e0f8d8 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -7,9 +7,9 @@ import pytest +from singer_sdk._singerlib import Catalog from singer_sdk.exceptions import MapExpressionError from singer_sdk.helpers._catalog import get_selected_schema -from singer_sdk.helpers._singer import Catalog from singer_sdk.mapper import PluginMapper, RemoveRecordTransform, md5 from singer_sdk.streams.core import Stream from singer_sdk.tap_base import Tap diff --git a/tests/core/test_schema.py b/tests/core/test_schema.py index 87b7ec03b..2834a91dd 100644 --- a/tests/core/test_schema.py +++ b/tests/core/test_schema.py @@ -1,5 +1,5 @@ """ -Testing that SchemaPlus can convert schemas lossless from and to dicts. +Testing that Schema can convert schemas lossless from and to dicts. Schemas are taken from these examples; https://json-schema.org/learn/miscellaneous-examples.html @@ -29,7 +29,7 @@ 'anyOf' and' patternProperties' as SIMPLE even though they can contain schemas. """ -from singer_sdk.helpers._schema import SchemaPlus +from singer_sdk._singerlib import Schema def test_simple_schema(): @@ -44,10 +44,10 @@ def test_simple_schema(): }, } - schema_plus = SchemaPlus.from_dict(simple_schema) + schema_plus = Schema.from_dict(simple_schema) assert schema_plus.to_dict() == simple_schema assert schema_plus.required == ["latitude", "longitude"] - assert isinstance(schema_plus.properties["latitude"], SchemaPlus) + assert isinstance(schema_plus.properties["latitude"], Schema) latitude = schema_plus.properties["latitude"] assert latitude.type == "number" @@ -58,9 +58,9 @@ def test_schema_with_items(): "type": "object", "properties": {"fruits": {"type": "array", "items": {"type": "string"}}}, } - schema_plus = SchemaPlus.from_dict(schema) + schema_plus = Schema.from_dict(schema) assert schema_plus.to_dict() == schema - assert isinstance(schema_plus.properties["fruits"], SchemaPlus) + assert isinstance(schema_plus.properties["fruits"], Schema) fruits = schema_plus.properties["fruits"] - assert isinstance(fruits.items, SchemaPlus) + assert isinstance(fruits.items, Schema) assert fruits.items.type == "string" diff --git a/tests/core/test_singer.py b/tests/core/test_singer.py deleted file mode 100644 index aa7ef2a66..000000000 --- a/tests/core/test_singer.py +++ /dev/null @@ -1,118 +0,0 @@ -from typing import List, Optional - -import pytest - -from singer_sdk.helpers._singer import Catalog, CatalogEntry, Metadata, MetadataMapping - - -def test_catalog_parsing(): - """Validate parsing works for a catalog and its stream entries.""" - catalog_dict = { - "streams": [ - { - "tap_stream_id": "test", - "metadata": [ - { - "breadcrumb": [], - "metadata": { - "inclusion": "available", - }, - }, - { - "breadcrumb": ["properties", "a"], - "metadata": { - "inclusion": "unsupported", - }, - }, - ], - "schema": { - "type": "object", - }, - }, - ], - } - catalog = Catalog.from_dict(catalog_dict) - - assert catalog.streams[0].tap_stream_id == "test" - assert catalog.get_stream("test").tap_stream_id == "test" - assert catalog["test"].metadata.to_list() == catalog_dict["streams"][0]["metadata"] - assert catalog["test"].tap_stream_id == catalog_dict["streams"][0]["tap_stream_id"] - assert catalog["test"].schema.to_dict() == {"type": "object"} - assert catalog.to_dict() == catalog_dict - - new = { - "tap_stream_id": "new", - "metadata": [], - "schema": {}, - } - entry = CatalogEntry.from_dict(new) - catalog.add_stream(entry) - assert catalog.get_stream("new") == entry - - -@pytest.mark.parametrize( - "schema,key_properties,replication_method,valid_replication_keys", - [ - ( - {"properties": {"id": {"type": "integer"}}, "type": "object"}, - ["id"], - "FULL_TABLE", - None, - ), - ( - { - "properties": { - "first_name": {"type": "string"}, - "last_name": {"type": "string"}, - }, - "type": "object", - }, - ["first_name", "last_name"], - "INCREMENTAL", - ["updated_at"], - ), - ( - { - "properties": { - "first_name": {"type": "string"}, - "last_name": {"type": "string"}, - }, - "type": "object", - }, - ["first_name", "last_name"], - "FULL_TABLE", - None, - ), - ( - {}, - [], - None, - None, - ), - ], -) -def test_standard_metadata( - schema: dict, - key_properties: List[str], - replication_method: Optional[str], - valid_replication_keys: Optional[List[str]], -): - """Validate generated metadata.""" - metadata = MetadataMapping.get_standard_metadata( - schema=schema, - schema_name="test", - key_properties=key_properties, - replication_method=replication_method, - valid_replication_keys=valid_replication_keys, - ) - - stream_metadata = metadata[()] - assert stream_metadata.table_key_properties == key_properties - assert stream_metadata.forced_replication_method == replication_method - assert stream_metadata.valid_replication_keys == valid_replication_keys - assert stream_metadata.selected is None - - for pk in key_properties: - pk_metadata = metadata[("properties", pk)] - assert pk_metadata.inclusion == Metadata.InclusionType.AUTOMATIC - assert pk_metadata.selected is None diff --git a/tests/core/test_singer_messages.py b/tests/core/test_singer_messages.py index 3858731a9..a257be4fe 100644 --- a/tests/core/test_singer_messages.py +++ b/tests/core/test_singer_messages.py @@ -1,9 +1,7 @@ -from dataclasses import asdict - import pytest +from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._batch import JSONLinesEncoding, SDKBatchMessage -from singer_sdk.helpers._singer import SingerMessageType @pytest.mark.parametrize( @@ -34,7 +32,7 @@ def test_batch_message_as_dict(message, expected): """Test batch message as dict.""" - dumped = message.asdict() + dumped = message.to_dict() assert dumped == expected assert message.from_dict(dumped) == message diff --git a/tests/core/test_sqlite.py b/tests/core/test_sqlite.py index b6a24fd77..5e76d8168 100644 --- a/tests/core/test_sqlite.py +++ b/tests/core/test_sqlite.py @@ -16,7 +16,7 @@ from samples.sample_target_sqlite import SQLiteSink, SQLiteTarget from singer_sdk import SQLStream from singer_sdk import typing as th -from singer_sdk.helpers._singer import Catalog, MetadataMapping, StreamMetadata +from singer_sdk._singerlib import Catalog, MetadataMapping, StreamMetadata from singer_sdk.tap_base import SQLTap from singer_sdk.target_base import SQLTarget from singer_sdk.testing import ( diff --git a/tests/core/test_streams.py b/tests/core/test_streams.py index 8945ca996..fd226c456 100644 --- a/tests/core/test_streams.py +++ b/tests/core/test_streams.py @@ -9,8 +9,8 @@ import pytest import requests +from singer_sdk._singerlib import Catalog, MetadataMapping from singer_sdk.helpers._classproperty import classproperty -from singer_sdk.helpers._singer import Catalog, MetadataMapping from singer_sdk.helpers.jsonpath import _compile_jsonpath, extract_jsonpath from singer_sdk.pagination import first from singer_sdk.streams.core import ( diff --git a/tests/external/test_gitlab_sync.py b/tests/external/test_gitlab_sync.py index 6ed2f2644..d2a3e9f55 100644 --- a/tests/external/test_gitlab_sync.py +++ b/tests/external/test_gitlab_sync.py @@ -2,8 +2,8 @@ from typing import Optional from samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab +from singer_sdk._singerlib import Catalog from singer_sdk.helpers import _catalog -from singer_sdk.helpers._singer import Catalog COUNTER = 0 SAMPLE_CONFIG_BAD = {"not": "correct"} From 14acaa42201f3b5b83486df18da761a794b4bde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= <edgarrm358@gmail.com> Date: Mon, 19 Sep 2022 15:06:07 -0500 Subject: [PATCH 2/5] Rename singer_sdk._singerlib -> singer_sdk.singer --- .flake8 | 2 +- samples/sample_mapper/mapper.py | 2 +- singer_sdk/helpers/_batch.py | 2 +- singer_sdk/helpers/_catalog.py | 2 +- singer_sdk/io_base.py | 2 +- singer_sdk/mapper.py | 2 +- singer_sdk/mapper_base.py | 2 +- singer_sdk/{_singerlib => singer}/__init__.py | 8 +++++--- singer_sdk/{_singerlib => singer}/catalog.py | 4 +++- singer_sdk/{_singerlib => singer}/messages.py | 0 singer_sdk/{_singerlib => singer}/schema.py | 0 singer_sdk/streams/core.py | 2 +- singer_sdk/streams/rest.py | 2 +- singer_sdk/streams/sql.py | 2 +- singer_sdk/tap_base.py | 4 ++-- singer_sdk/testing.py | 2 +- tests/core/test_catalog_selection.py | 2 +- tests/core/test_mapper.py | 2 +- tests/core/test_schema.py | 2 +- tests/core/test_singer_messages.py | 2 +- tests/core/test_sqlite.py | 2 +- tests/core/test_streams.py | 2 +- tests/external/test_gitlab_sync.py | 2 +- tests/{_singerlib => singer}/test_catalog.py | 2 +- tests/{_singerlib => singer}/test_messages.py | 4 ++-- tests/{_singerlib => singer}/test_schema.py | 2 +- 26 files changed, 32 insertions(+), 28 deletions(-) rename singer_sdk/{_singerlib => singer}/__init__.py (63%) rename singer_sdk/{_singerlib => singer}/catalog.py (99%) rename singer_sdk/{_singerlib => singer}/messages.py (100%) rename singer_sdk/{_singerlib => singer}/schema.py (100%) rename tests/{_singerlib => singer}/test_catalog.py (99%) rename tests/{_singerlib => singer}/test_messages.py (97%) rename tests/{_singerlib => singer}/test_schema.py (98%) diff --git a/.flake8 b/.flake8 index 1ae5c448f..d40cfdc73 100644 --- a/.flake8 +++ b/.flake8 @@ -12,7 +12,7 @@ per-file-ignores = # Don't require docstrings conventions in "meta" code # singer_sdk/helpers/_classproperty.py:D105 # Ignore unused imports in __init__.py files - singer_sdk/_singerlib/__init__.py:F401 + singer_sdk/singer/__init__.py:F401 max-complexity = 10 docstring-convention = google allow-star-arg-any = true diff --git a/samples/sample_mapper/mapper.py b/samples/sample_mapper/mapper.py index 2ad133383..fd1d39e62 100644 --- a/samples/sample_mapper/mapper.py +++ b/samples/sample_mapper/mapper.py @@ -3,8 +3,8 @@ from pathlib import PurePath from typing import Generator, List, Optional, Union -import singer_sdk._singerlib as singer import singer_sdk.typing as th +from singer_sdk import singer from singer_sdk.helpers._util import utc_now from singer_sdk.mapper import PluginMapper from singer_sdk.mapper_base import InlineMapper diff --git a/singer_sdk/helpers/_batch.py b/singer_sdk/helpers/_batch.py index 5bac888ba..d99707a32 100644 --- a/singer_sdk/helpers/_batch.py +++ b/singer_sdk/helpers/_batch.py @@ -10,7 +10,7 @@ import fs -from singer_sdk._singerlib.messages import Message, SingerMessageType +from singer_sdk.singer import Message, SingerMessageType if TYPE_CHECKING: from fs.base import FS diff --git a/singer_sdk/helpers/_catalog.py b/singer_sdk/helpers/_catalog.py index 70a883157..d02a7e325 100644 --- a/singer_sdk/helpers/_catalog.py +++ b/singer_sdk/helpers/_catalog.py @@ -6,8 +6,8 @@ from memoization import cached -from singer_sdk._singerlib import Catalog, SelectionMask from singer_sdk.helpers._typing import is_object_type +from singer_sdk.singer import Catalog, SelectionMask _MAX_LRU_CACHE = 500 diff --git a/singer_sdk/io_base.py b/singer_sdk/io_base.py index c919dffe4..44da3c7da 100644 --- a/singer_sdk/io_base.py +++ b/singer_sdk/io_base.py @@ -10,8 +10,8 @@ from typing import IO from typing import Counter as CounterType -from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._compat import final +from singer_sdk.singer import SingerMessageType logger = logging.getLogger(__name__) diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index 48918def2..d71986d52 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -11,7 +11,6 @@ import logging from typing import Any, Callable -from singer_sdk._singerlib.catalog import Catalog from singer_sdk.exceptions import MapExpressionError, StreamMapConfigError from singer_sdk.helpers import _simpleeval as simpleeval from singer_sdk.helpers._catalog import get_selected_schema @@ -21,6 +20,7 @@ flatten_schema, get_flattening_options, ) +from singer_sdk.singer import Catalog from singer_sdk.typing import ( CustomType, IntegerType, diff --git a/singer_sdk/mapper_base.py b/singer_sdk/mapper_base.py index 0f7998a5d..dde836b15 100644 --- a/singer_sdk/mapper_base.py +++ b/singer_sdk/mapper_base.py @@ -6,7 +6,7 @@ import click -import singer_sdk._singerlib as singer +from singer_sdk import singer from singer_sdk.cli import common_options from singer_sdk.configuration._dict_config import merge_config_sources from singer_sdk.helpers._classproperty import classproperty diff --git a/singer_sdk/_singerlib/__init__.py b/singer_sdk/singer/__init__.py similarity index 63% rename from singer_sdk/_singerlib/__init__.py rename to singer_sdk/singer/__init__.py index bbf243df3..27354ae42 100644 --- a/singer_sdk/_singerlib/__init__.py +++ b/singer_sdk/singer/__init__.py @@ -1,4 +1,6 @@ -from singer_sdk._singerlib.catalog import ( +"""Singer Protocol package.""" + +from singer_sdk.singer.catalog import ( Catalog, CatalogEntry, Metadata, @@ -6,7 +8,7 @@ SelectionMask, StreamMetadata, ) -from singer_sdk._singerlib.messages import ( +from singer_sdk.singer.messages import ( ActivateVersionMessage, Message, RecordMessage, @@ -16,4 +18,4 @@ exclude_null_dict, write_message, ) -from singer_sdk._singerlib.schema import Schema +from singer_sdk.singer.schema import Schema diff --git a/singer_sdk/_singerlib/catalog.py b/singer_sdk/singer/catalog.py similarity index 99% rename from singer_sdk/_singerlib/catalog.py rename to singer_sdk/singer/catalog.py index 89dd789c5..60651e615 100644 --- a/singer_sdk/_singerlib/catalog.py +++ b/singer_sdk/singer/catalog.py @@ -1,3 +1,5 @@ +"""Singer catalog parsing.""" + from __future__ import annotations import enum @@ -5,7 +7,7 @@ import typing as t from dataclasses import dataclass, fields -from singer_sdk._singerlib.schema import Schema +from singer_sdk.singer.schema import Schema if t.TYPE_CHECKING: from typing_extensions import TypeAlias diff --git a/singer_sdk/_singerlib/messages.py b/singer_sdk/singer/messages.py similarity index 100% rename from singer_sdk/_singerlib/messages.py rename to singer_sdk/singer/messages.py diff --git a/singer_sdk/_singerlib/schema.py b/singer_sdk/singer/schema.py similarity index 100% rename from singer_sdk/_singerlib/schema.py rename to singer_sdk/singer/schema.py diff --git a/singer_sdk/streams/core.py b/singer_sdk/streams/core.py index 9c7d2e2d6..4ae3775f0 100644 --- a/singer_sdk/streams/core.py +++ b/singer_sdk/streams/core.py @@ -18,7 +18,7 @@ import pendulum import requests -import singer_sdk._singerlib as singer +from singer_sdk import singer from singer_sdk.exceptions import InvalidStreamSortException, MaxRecordsLimitException from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index 11c59e236..e96c92be9 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -13,7 +13,6 @@ import backoff import requests -from singer_sdk._singerlib import Schema from singer_sdk.authenticators import APIAuthenticatorBase, SimpleAuthenticator from singer_sdk.exceptions import FatalAPIError, RetriableAPIError from singer_sdk.helpers.jsonpath import extract_jsonpath @@ -24,6 +23,7 @@ SimpleHeaderPaginator, ) from singer_sdk.plugin_base import PluginBase as TapBaseClass +from singer_sdk.singer import Schema from singer_sdk.streams.core import Stream DEFAULT_PAGE_SIZE = 1000 diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index 83c808cb7..c8e0f6042 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -13,9 +13,9 @@ from sqlalchemy.engine.reflection import Inspector from singer_sdk import typing as th -from singer_sdk._singerlib import CatalogEntry, MetadataMapping, Schema from singer_sdk.exceptions import ConfigValidationError from singer_sdk.plugin_base import PluginBase as TapBaseClass +from singer_sdk.singer import CatalogEntry, MetadataMapping, Schema from singer_sdk.streams.core import Stream diff --git a/singer_sdk/tap_base.py b/singer_sdk/tap_base.py index 6873cd22e..ece623ed7 100644 --- a/singer_sdk/tap_base.py +++ b/singer_sdk/tap_base.py @@ -8,7 +8,6 @@ import click -from singer_sdk._singerlib import Catalog from singer_sdk.cli import common_options from singer_sdk.exceptions import MaxRecordsLimitException from singer_sdk.helpers import _state @@ -23,6 +22,7 @@ ) from singer_sdk.mapper import PluginMapper from singer_sdk.plugin_base import PluginBase +from singer_sdk.singer import Catalog from singer_sdk.streams import SQLStream, Stream STREAM_MAPS_CONFIG = "stream_maps" @@ -242,7 +242,7 @@ def _singer_catalog(self) -> Catalog: """Return a Catalog object. Returns: - :class:`singer_sdk._singerlib.Catalog`. + :class:`singer_sdk.singer.Catalog`. """ return Catalog( (stream.tap_stream_id, stream._singer_catalog_entry) diff --git a/singer_sdk/testing.py b/singer_sdk/testing.py index 84d575bae..94360ab83 100644 --- a/singer_sdk/testing.py +++ b/singer_sdk/testing.py @@ -4,7 +4,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Callable, List, Optional, Tuple, Type, cast -import singer_sdk._singerlib as singer +from singer_sdk import singer from singer_sdk.mapper_base import InlineMapper from singer_sdk.tap_base import Tap from singer_sdk.target_base import Target diff --git a/tests/core/test_catalog_selection.py b/tests/core/test_catalog_selection.py index d5a94b371..1b3d52db0 100644 --- a/tests/core/test_catalog_selection.py +++ b/tests/core/test_catalog_selection.py @@ -5,7 +5,7 @@ import pytest -import singer_sdk._singerlib as singer +from singer_sdk import singer from singer_sdk.helpers._catalog import ( get_selected_schema, pop_deselected_record_properties, diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index ba9e0f8d8..6cd4acef9 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -7,10 +7,10 @@ import pytest -from singer_sdk._singerlib import Catalog from singer_sdk.exceptions import MapExpressionError from singer_sdk.helpers._catalog import get_selected_schema from singer_sdk.mapper import PluginMapper, RemoveRecordTransform, md5 +from singer_sdk.singer import Catalog from singer_sdk.streams.core import Stream from singer_sdk.tap_base import Tap from singer_sdk.typing import ( diff --git a/tests/core/test_schema.py b/tests/core/test_schema.py index 2834a91dd..ac6c0669a 100644 --- a/tests/core/test_schema.py +++ b/tests/core/test_schema.py @@ -29,7 +29,7 @@ 'anyOf' and' patternProperties' as SIMPLE even though they can contain schemas. """ -from singer_sdk._singerlib import Schema +from singer_sdk.singer import Schema def test_simple_schema(): diff --git a/tests/core/test_singer_messages.py b/tests/core/test_singer_messages.py index a257be4fe..2e294f7d5 100644 --- a/tests/core/test_singer_messages.py +++ b/tests/core/test_singer_messages.py @@ -1,7 +1,7 @@ import pytest -from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._batch import JSONLinesEncoding, SDKBatchMessage +from singer_sdk.singer import SingerMessageType @pytest.mark.parametrize( diff --git a/tests/core/test_sqlite.py b/tests/core/test_sqlite.py index 5e76d8168..194df3d82 100644 --- a/tests/core/test_sqlite.py +++ b/tests/core/test_sqlite.py @@ -16,7 +16,7 @@ from samples.sample_target_sqlite import SQLiteSink, SQLiteTarget from singer_sdk import SQLStream from singer_sdk import typing as th -from singer_sdk._singerlib import Catalog, MetadataMapping, StreamMetadata +from singer_sdk.singer import Catalog, MetadataMapping, StreamMetadata from singer_sdk.tap_base import SQLTap from singer_sdk.target_base import SQLTarget from singer_sdk.testing import ( diff --git a/tests/core/test_streams.py b/tests/core/test_streams.py index fd226c456..f3db7ca34 100644 --- a/tests/core/test_streams.py +++ b/tests/core/test_streams.py @@ -9,10 +9,10 @@ import pytest import requests -from singer_sdk._singerlib import Catalog, MetadataMapping from singer_sdk.helpers._classproperty import classproperty from singer_sdk.helpers.jsonpath import _compile_jsonpath, extract_jsonpath from singer_sdk.pagination import first +from singer_sdk.singer import Catalog, MetadataMapping from singer_sdk.streams.core import ( REPLICATION_FULL_TABLE, REPLICATION_INCREMENTAL, diff --git a/tests/external/test_gitlab_sync.py b/tests/external/test_gitlab_sync.py index d2a3e9f55..25217fc6c 100644 --- a/tests/external/test_gitlab_sync.py +++ b/tests/external/test_gitlab_sync.py @@ -2,8 +2,8 @@ from typing import Optional from samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab -from singer_sdk._singerlib import Catalog from singer_sdk.helpers import _catalog +from singer_sdk.singer import Catalog COUNTER = 0 SAMPLE_CONFIG_BAD = {"not": "correct"} diff --git a/tests/_singerlib/test_catalog.py b/tests/singer/test_catalog.py similarity index 99% rename from tests/_singerlib/test_catalog.py rename to tests/singer/test_catalog.py index 26a64f887..d57332d43 100644 --- a/tests/_singerlib/test_catalog.py +++ b/tests/singer/test_catalog.py @@ -2,7 +2,7 @@ import pytest -from singer_sdk._singerlib import ( +from singer_sdk.singer import ( Catalog, CatalogEntry, Metadata, diff --git a/tests/_singerlib/test_messages.py b/tests/singer/test_messages.py similarity index 97% rename from tests/_singerlib/test_messages.py rename to tests/singer/test_messages.py index b183cbde0..c0d9bad4b 100644 --- a/tests/_singerlib/test_messages.py +++ b/tests/singer/test_messages.py @@ -5,8 +5,8 @@ import pytest from pytz import UTC, timezone -import singer_sdk._singerlib as singer -from singer_sdk._singerlib.messages import format_message +from singer_sdk import singer +from singer_sdk.singer.messages import format_message def test_exclude_null_dict(): diff --git a/tests/_singerlib/test_schema.py b/tests/singer/test_schema.py similarity index 98% rename from tests/_singerlib/test_schema.py rename to tests/singer/test_schema.py index 781d163cd..4644b1fda 100644 --- a/tests/_singerlib/test_schema.py +++ b/tests/singer/test_schema.py @@ -1,6 +1,6 @@ import pytest -from singer_sdk._singerlib import Schema +from singer_sdk.singer import Schema STRING_SCHEMA = Schema(type="string", maxLength=32) STRING_DICT = {"type": "string", "maxLength": 32} From 337bd8a53237fe2588e87b1b58724dd88094f9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= <edgarrm358@gmail.com> Date: Mon, 19 Sep 2022 16:22:55 -0500 Subject: [PATCH 3/5] Revert "Rename singer_sdk._singerlib -> singer_sdk.singer" This reverts commit 14acaa42201f3b5b83486df18da761a794b4bde1. --- .flake8 | 2 +- samples/sample_mapper/mapper.py | 2 +- singer_sdk/{singer => _singerlib}/__init__.py | 8 +++----- singer_sdk/{singer => _singerlib}/catalog.py | 4 +--- singer_sdk/{singer => _singerlib}/messages.py | 0 singer_sdk/{singer => _singerlib}/schema.py | 0 singer_sdk/helpers/_batch.py | 2 +- singer_sdk/helpers/_catalog.py | 2 +- singer_sdk/io_base.py | 2 +- singer_sdk/mapper.py | 2 +- singer_sdk/mapper_base.py | 2 +- singer_sdk/streams/core.py | 2 +- singer_sdk/streams/rest.py | 2 +- singer_sdk/streams/sql.py | 2 +- singer_sdk/tap_base.py | 4 ++-- singer_sdk/testing.py | 2 +- tests/{singer => _singerlib}/test_catalog.py | 2 +- tests/{singer => _singerlib}/test_messages.py | 4 ++-- tests/{singer => _singerlib}/test_schema.py | 2 +- tests/core/test_catalog_selection.py | 2 +- tests/core/test_mapper.py | 2 +- tests/core/test_schema.py | 2 +- tests/core/test_singer_messages.py | 2 +- tests/core/test_sqlite.py | 2 +- tests/core/test_streams.py | 2 +- tests/external/test_gitlab_sync.py | 2 +- 26 files changed, 28 insertions(+), 32 deletions(-) rename singer_sdk/{singer => _singerlib}/__init__.py (63%) rename singer_sdk/{singer => _singerlib}/catalog.py (99%) rename singer_sdk/{singer => _singerlib}/messages.py (100%) rename singer_sdk/{singer => _singerlib}/schema.py (100%) rename tests/{singer => _singerlib}/test_catalog.py (99%) rename tests/{singer => _singerlib}/test_messages.py (97%) rename tests/{singer => _singerlib}/test_schema.py (98%) diff --git a/.flake8 b/.flake8 index d40cfdc73..1ae5c448f 100644 --- a/.flake8 +++ b/.flake8 @@ -12,7 +12,7 @@ per-file-ignores = # Don't require docstrings conventions in "meta" code # singer_sdk/helpers/_classproperty.py:D105 # Ignore unused imports in __init__.py files - singer_sdk/singer/__init__.py:F401 + singer_sdk/_singerlib/__init__.py:F401 max-complexity = 10 docstring-convention = google allow-star-arg-any = true diff --git a/samples/sample_mapper/mapper.py b/samples/sample_mapper/mapper.py index fd1d39e62..2ad133383 100644 --- a/samples/sample_mapper/mapper.py +++ b/samples/sample_mapper/mapper.py @@ -3,8 +3,8 @@ from pathlib import PurePath from typing import Generator, List, Optional, Union +import singer_sdk._singerlib as singer import singer_sdk.typing as th -from singer_sdk import singer from singer_sdk.helpers._util import utc_now from singer_sdk.mapper import PluginMapper from singer_sdk.mapper_base import InlineMapper diff --git a/singer_sdk/singer/__init__.py b/singer_sdk/_singerlib/__init__.py similarity index 63% rename from singer_sdk/singer/__init__.py rename to singer_sdk/_singerlib/__init__.py index 27354ae42..bbf243df3 100644 --- a/singer_sdk/singer/__init__.py +++ b/singer_sdk/_singerlib/__init__.py @@ -1,6 +1,4 @@ -"""Singer Protocol package.""" - -from singer_sdk.singer.catalog import ( +from singer_sdk._singerlib.catalog import ( Catalog, CatalogEntry, Metadata, @@ -8,7 +6,7 @@ SelectionMask, StreamMetadata, ) -from singer_sdk.singer.messages import ( +from singer_sdk._singerlib.messages import ( ActivateVersionMessage, Message, RecordMessage, @@ -18,4 +16,4 @@ exclude_null_dict, write_message, ) -from singer_sdk.singer.schema import Schema +from singer_sdk._singerlib.schema import Schema diff --git a/singer_sdk/singer/catalog.py b/singer_sdk/_singerlib/catalog.py similarity index 99% rename from singer_sdk/singer/catalog.py rename to singer_sdk/_singerlib/catalog.py index 60651e615..89dd789c5 100644 --- a/singer_sdk/singer/catalog.py +++ b/singer_sdk/_singerlib/catalog.py @@ -1,5 +1,3 @@ -"""Singer catalog parsing.""" - from __future__ import annotations import enum @@ -7,7 +5,7 @@ import typing as t from dataclasses import dataclass, fields -from singer_sdk.singer.schema import Schema +from singer_sdk._singerlib.schema import Schema if t.TYPE_CHECKING: from typing_extensions import TypeAlias diff --git a/singer_sdk/singer/messages.py b/singer_sdk/_singerlib/messages.py similarity index 100% rename from singer_sdk/singer/messages.py rename to singer_sdk/_singerlib/messages.py diff --git a/singer_sdk/singer/schema.py b/singer_sdk/_singerlib/schema.py similarity index 100% rename from singer_sdk/singer/schema.py rename to singer_sdk/_singerlib/schema.py diff --git a/singer_sdk/helpers/_batch.py b/singer_sdk/helpers/_batch.py index d99707a32..5bac888ba 100644 --- a/singer_sdk/helpers/_batch.py +++ b/singer_sdk/helpers/_batch.py @@ -10,7 +10,7 @@ import fs -from singer_sdk.singer import Message, SingerMessageType +from singer_sdk._singerlib.messages import Message, SingerMessageType if TYPE_CHECKING: from fs.base import FS diff --git a/singer_sdk/helpers/_catalog.py b/singer_sdk/helpers/_catalog.py index d02a7e325..70a883157 100644 --- a/singer_sdk/helpers/_catalog.py +++ b/singer_sdk/helpers/_catalog.py @@ -6,8 +6,8 @@ from memoization import cached +from singer_sdk._singerlib import Catalog, SelectionMask from singer_sdk.helpers._typing import is_object_type -from singer_sdk.singer import Catalog, SelectionMask _MAX_LRU_CACHE = 500 diff --git a/singer_sdk/io_base.py b/singer_sdk/io_base.py index 44da3c7da..c919dffe4 100644 --- a/singer_sdk/io_base.py +++ b/singer_sdk/io_base.py @@ -10,8 +10,8 @@ from typing import IO from typing import Counter as CounterType +from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._compat import final -from singer_sdk.singer import SingerMessageType logger = logging.getLogger(__name__) diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index d71986d52..48918def2 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -11,6 +11,7 @@ import logging from typing import Any, Callable +from singer_sdk._singerlib.catalog import Catalog from singer_sdk.exceptions import MapExpressionError, StreamMapConfigError from singer_sdk.helpers import _simpleeval as simpleeval from singer_sdk.helpers._catalog import get_selected_schema @@ -20,7 +21,6 @@ flatten_schema, get_flattening_options, ) -from singer_sdk.singer import Catalog from singer_sdk.typing import ( CustomType, IntegerType, diff --git a/singer_sdk/mapper_base.py b/singer_sdk/mapper_base.py index dde836b15..0f7998a5d 100644 --- a/singer_sdk/mapper_base.py +++ b/singer_sdk/mapper_base.py @@ -6,7 +6,7 @@ import click -from singer_sdk import singer +import singer_sdk._singerlib as singer from singer_sdk.cli import common_options from singer_sdk.configuration._dict_config import merge_config_sources from singer_sdk.helpers._classproperty import classproperty diff --git a/singer_sdk/streams/core.py b/singer_sdk/streams/core.py index 4ae3775f0..9c7d2e2d6 100644 --- a/singer_sdk/streams/core.py +++ b/singer_sdk/streams/core.py @@ -18,7 +18,7 @@ import pendulum import requests -from singer_sdk import singer +import singer_sdk._singerlib as singer from singer_sdk.exceptions import InvalidStreamSortException, MaxRecordsLimitException from singer_sdk.helpers._batch import ( BaseBatchFileEncoding, diff --git a/singer_sdk/streams/rest.py b/singer_sdk/streams/rest.py index e96c92be9..11c59e236 100644 --- a/singer_sdk/streams/rest.py +++ b/singer_sdk/streams/rest.py @@ -13,6 +13,7 @@ import backoff import requests +from singer_sdk._singerlib import Schema from singer_sdk.authenticators import APIAuthenticatorBase, SimpleAuthenticator from singer_sdk.exceptions import FatalAPIError, RetriableAPIError from singer_sdk.helpers.jsonpath import extract_jsonpath @@ -23,7 +24,6 @@ SimpleHeaderPaginator, ) from singer_sdk.plugin_base import PluginBase as TapBaseClass -from singer_sdk.singer import Schema from singer_sdk.streams.core import Stream DEFAULT_PAGE_SIZE = 1000 diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index c8e0f6042..83c808cb7 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -13,9 +13,9 @@ from sqlalchemy.engine.reflection import Inspector from singer_sdk import typing as th +from singer_sdk._singerlib import CatalogEntry, MetadataMapping, Schema from singer_sdk.exceptions import ConfigValidationError from singer_sdk.plugin_base import PluginBase as TapBaseClass -from singer_sdk.singer import CatalogEntry, MetadataMapping, Schema from singer_sdk.streams.core import Stream diff --git a/singer_sdk/tap_base.py b/singer_sdk/tap_base.py index ece623ed7..6873cd22e 100644 --- a/singer_sdk/tap_base.py +++ b/singer_sdk/tap_base.py @@ -8,6 +8,7 @@ import click +from singer_sdk._singerlib import Catalog from singer_sdk.cli import common_options from singer_sdk.exceptions import MaxRecordsLimitException from singer_sdk.helpers import _state @@ -22,7 +23,6 @@ ) from singer_sdk.mapper import PluginMapper from singer_sdk.plugin_base import PluginBase -from singer_sdk.singer import Catalog from singer_sdk.streams import SQLStream, Stream STREAM_MAPS_CONFIG = "stream_maps" @@ -242,7 +242,7 @@ def _singer_catalog(self) -> Catalog: """Return a Catalog object. Returns: - :class:`singer_sdk.singer.Catalog`. + :class:`singer_sdk._singerlib.Catalog`. """ return Catalog( (stream.tap_stream_id, stream._singer_catalog_entry) diff --git a/singer_sdk/testing.py b/singer_sdk/testing.py index 94360ab83..84d575bae 100644 --- a/singer_sdk/testing.py +++ b/singer_sdk/testing.py @@ -4,7 +4,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Callable, List, Optional, Tuple, Type, cast -from singer_sdk import singer +import singer_sdk._singerlib as singer from singer_sdk.mapper_base import InlineMapper from singer_sdk.tap_base import Tap from singer_sdk.target_base import Target diff --git a/tests/singer/test_catalog.py b/tests/_singerlib/test_catalog.py similarity index 99% rename from tests/singer/test_catalog.py rename to tests/_singerlib/test_catalog.py index d57332d43..26a64f887 100644 --- a/tests/singer/test_catalog.py +++ b/tests/_singerlib/test_catalog.py @@ -2,7 +2,7 @@ import pytest -from singer_sdk.singer import ( +from singer_sdk._singerlib import ( Catalog, CatalogEntry, Metadata, diff --git a/tests/singer/test_messages.py b/tests/_singerlib/test_messages.py similarity index 97% rename from tests/singer/test_messages.py rename to tests/_singerlib/test_messages.py index c0d9bad4b..b183cbde0 100644 --- a/tests/singer/test_messages.py +++ b/tests/_singerlib/test_messages.py @@ -5,8 +5,8 @@ import pytest from pytz import UTC, timezone -from singer_sdk import singer -from singer_sdk.singer.messages import format_message +import singer_sdk._singerlib as singer +from singer_sdk._singerlib.messages import format_message def test_exclude_null_dict(): diff --git a/tests/singer/test_schema.py b/tests/_singerlib/test_schema.py similarity index 98% rename from tests/singer/test_schema.py rename to tests/_singerlib/test_schema.py index 4644b1fda..781d163cd 100644 --- a/tests/singer/test_schema.py +++ b/tests/_singerlib/test_schema.py @@ -1,6 +1,6 @@ import pytest -from singer_sdk.singer import Schema +from singer_sdk._singerlib import Schema STRING_SCHEMA = Schema(type="string", maxLength=32) STRING_DICT = {"type": "string", "maxLength": 32} diff --git a/tests/core/test_catalog_selection.py b/tests/core/test_catalog_selection.py index 1b3d52db0..d5a94b371 100644 --- a/tests/core/test_catalog_selection.py +++ b/tests/core/test_catalog_selection.py @@ -5,7 +5,7 @@ import pytest -from singer_sdk import singer +import singer_sdk._singerlib as singer from singer_sdk.helpers._catalog import ( get_selected_schema, pop_deselected_record_properties, diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index 6cd4acef9..ba9e0f8d8 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -7,10 +7,10 @@ import pytest +from singer_sdk._singerlib import Catalog from singer_sdk.exceptions import MapExpressionError from singer_sdk.helpers._catalog import get_selected_schema from singer_sdk.mapper import PluginMapper, RemoveRecordTransform, md5 -from singer_sdk.singer import Catalog from singer_sdk.streams.core import Stream from singer_sdk.tap_base import Tap from singer_sdk.typing import ( diff --git a/tests/core/test_schema.py b/tests/core/test_schema.py index ac6c0669a..2834a91dd 100644 --- a/tests/core/test_schema.py +++ b/tests/core/test_schema.py @@ -29,7 +29,7 @@ 'anyOf' and' patternProperties' as SIMPLE even though they can contain schemas. """ -from singer_sdk.singer import Schema +from singer_sdk._singerlib import Schema def test_simple_schema(): diff --git a/tests/core/test_singer_messages.py b/tests/core/test_singer_messages.py index 2e294f7d5..a257be4fe 100644 --- a/tests/core/test_singer_messages.py +++ b/tests/core/test_singer_messages.py @@ -1,7 +1,7 @@ import pytest +from singer_sdk._singerlib import SingerMessageType from singer_sdk.helpers._batch import JSONLinesEncoding, SDKBatchMessage -from singer_sdk.singer import SingerMessageType @pytest.mark.parametrize( diff --git a/tests/core/test_sqlite.py b/tests/core/test_sqlite.py index 194df3d82..5e76d8168 100644 --- a/tests/core/test_sqlite.py +++ b/tests/core/test_sqlite.py @@ -16,7 +16,7 @@ from samples.sample_target_sqlite import SQLiteSink, SQLiteTarget from singer_sdk import SQLStream from singer_sdk import typing as th -from singer_sdk.singer import Catalog, MetadataMapping, StreamMetadata +from singer_sdk._singerlib import Catalog, MetadataMapping, StreamMetadata from singer_sdk.tap_base import SQLTap from singer_sdk.target_base import SQLTarget from singer_sdk.testing import ( diff --git a/tests/core/test_streams.py b/tests/core/test_streams.py index f3db7ca34..fd226c456 100644 --- a/tests/core/test_streams.py +++ b/tests/core/test_streams.py @@ -9,10 +9,10 @@ import pytest import requests +from singer_sdk._singerlib import Catalog, MetadataMapping from singer_sdk.helpers._classproperty import classproperty from singer_sdk.helpers.jsonpath import _compile_jsonpath, extract_jsonpath from singer_sdk.pagination import first -from singer_sdk.singer import Catalog, MetadataMapping from singer_sdk.streams.core import ( REPLICATION_FULL_TABLE, REPLICATION_INCREMENTAL, diff --git a/tests/external/test_gitlab_sync.py b/tests/external/test_gitlab_sync.py index 25217fc6c..d2a3e9f55 100644 --- a/tests/external/test_gitlab_sync.py +++ b/tests/external/test_gitlab_sync.py @@ -2,8 +2,8 @@ from typing import Optional from samples.sample_tap_gitlab.gitlab_tap import SampleTapGitlab +from singer_sdk._singerlib import Catalog from singer_sdk.helpers import _catalog -from singer_sdk.singer import Catalog COUNTER = 0 SAMPLE_CONFIG_BAD = {"not": "correct"} From a0fbde0e214526ad625355f2bef6abf0dbc0df4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= <edgarrm358@gmail.com> Date: Mon, 19 Sep 2022 16:31:33 -0500 Subject: [PATCH 4/5] Use __future__.annotations in moved tests --- tests/_singerlib/test_catalog.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/_singerlib/test_catalog.py b/tests/_singerlib/test_catalog.py index 26a64f887..7c1fc04d7 100644 --- a/tests/_singerlib/test_catalog.py +++ b/tests/_singerlib/test_catalog.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from __future__ import annotations import pytest @@ -240,10 +240,10 @@ def test_catalog_parsing(): ) def test_standard_metadata( schema: dict, - key_properties: List[str], - replication_method: Optional[str], - valid_replication_keys: Optional[List[str]], - schema_name: Optional[str], + key_properties: list[str] | None, + replication_method: str | None, + valid_replication_keys: list[str] | None, + schema_name: str | None, ): """Validate generated metadata.""" metadata = MetadataMapping.get_standard_metadata( From 9498e40ebed0d5373c0e80ef218bfca637e82dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= <edgarrm358@gmail.com> Date: Mon, 19 Sep 2022 18:46:11 -0500 Subject: [PATCH 5/5] Adapt singer.resolve_schema_references --- singer_sdk/_singerlib/__init__.py | 2 +- singer_sdk/_singerlib/schema.py | 64 ++++++++++++ tests/_singerlib/test_schema.py | 164 +++++++++++++++++++++++++++++- 3 files changed, 228 insertions(+), 2 deletions(-) diff --git a/singer_sdk/_singerlib/__init__.py b/singer_sdk/_singerlib/__init__.py index bbf243df3..76e039d92 100644 --- a/singer_sdk/_singerlib/__init__.py +++ b/singer_sdk/_singerlib/__init__.py @@ -16,4 +16,4 @@ exclude_null_dict, write_message, ) -from singer_sdk._singerlib.schema import Schema +from singer_sdk._singerlib.schema import Schema, resolve_schema_references diff --git a/singer_sdk/_singerlib/schema.py b/singer_sdk/_singerlib/schema.py index 09f9adc4f..2c4617b14 100644 --- a/singer_sdk/_singerlib/schema.py +++ b/singer_sdk/_singerlib/schema.py @@ -5,6 +5,8 @@ import typing as t from dataclasses import dataclass +from jsonschema import RefResolver + # These are keys defined in the JSON Schema spec that do not themselves contain # schemas (or lists of schemas) STANDARD_KEYS = [ @@ -105,3 +107,65 @@ def from_dict(cls: t.Type[Schema], data: dict, **schema_defaults: t.Any) -> Sche if key in data: kwargs[key] = data[key] return cls(**kwargs) + + +class _SchemaKey: + ref = "$ref" + items = "items" + properties = "properties" + pattern_properties = "patternProperties" + any_of = "anyOf" + + +def resolve_schema_references( + schema: dict[str, t.Any], + refs: dict[str, dict] | None = None, +) -> dict: + """Resolves and replaces json-schema $refs with the appropriate dict. + + Recursively walks the given schema dict, converting every instance of $ref in a + 'properties' structure with a resolved dict. + + This modifies the input schema and also returns it. + + Args: + schema: The schema dict + refs: A dict of <string, dict> which forms a store of referenced schemata. + + Returns: + A schema dict with all $refs replaced with the appropriate dict. + """ + refs = refs or {} + return _resolve_schema_references(schema, RefResolver("", schema, store=refs)) + + +def _resolve_schema_references( + schema: dict[str, t.Any], + resolver: RefResolver, +) -> dict[str, t.Any]: + if _SchemaKey.ref in schema: + reference_path = schema.pop(_SchemaKey.ref, None) + resolved = resolver.resolve(reference_path)[1] + schema.update(resolved) + return _resolve_schema_references(schema, resolver) + + if _SchemaKey.properties in schema: + for k, val in schema[_SchemaKey.properties].items(): + schema[_SchemaKey.properties][k] = _resolve_schema_references(val, resolver) + + if _SchemaKey.pattern_properties in schema: + for k, val in schema[_SchemaKey.pattern_properties].items(): + schema[_SchemaKey.pattern_properties][k] = _resolve_schema_references( + val, resolver + ) + + if _SchemaKey.items in schema: + schema[_SchemaKey.items] = _resolve_schema_references( + schema[_SchemaKey.items], resolver + ) + + if _SchemaKey.any_of in schema: + for i, element in enumerate(schema[_SchemaKey.any_of]): + schema[_SchemaKey.any_of][i] = _resolve_schema_references(element, resolver) + + return schema diff --git a/tests/_singerlib/test_schema.py b/tests/_singerlib/test_schema.py index 781d163cd..17d027c6b 100644 --- a/tests/_singerlib/test_schema.py +++ b/tests/_singerlib/test_schema.py @@ -1,6 +1,6 @@ import pytest -from singer_sdk._singerlib import Schema +from singer_sdk._singerlib import Schema, resolve_schema_references STRING_SCHEMA = Schema(type="string", maxLength=32) STRING_DICT = {"type": "string", "maxLength": 32} @@ -84,3 +84,165 @@ def test_schema_to_dict(schema, expected): ) def test_schema_from_dict(pydict, expected): assert Schema.from_dict(pydict) == expected + + +@pytest.mark.parametrize( + "schema,refs,expected", + [ + pytest.param( + { + "type": "object", + "definitions": {"string_type": {"type": "string"}}, + "properties": {"name": {"$ref": "#/definitions/string_type"}}, + }, + None, + { + "type": "object", + "definitions": {"string_type": {"type": "string"}}, + "properties": {"name": {"type": "string"}}, + }, + id="resolve_schema_references", + ), + pytest.param( + { + "type": "object", + "properties": { + "name": {"$ref": "references.json#/definitions/string_type"} + }, + }, + {"references.json": {"definitions": {"string_type": {"type": "string"}}}}, + { + "type": "object", + "properties": {"name": {"type": "string"}}, + }, + id="resolve_schema_references_with_refs", + ), + pytest.param( + { + "type": "object", + "definitions": {"string_type": {"type": "string"}}, + "patternProperties": {".+": {"$ref": "#/definitions/string_type"}}, + }, + None, + { + "type": "object", + "definitions": {"string_type": {"type": "string"}}, + "patternProperties": {".+": {"type": "string"}}, + }, + id="resolve_schema_references_with_pattern_properties", + ), + pytest.param( + { + "type": "object", + "properties": { + "dogs": {"type": "array", "items": {"$ref": "doggie.json#/dogs"}} + }, + }, + { + "doggie.json": { + "dogs": { + "type": "object", + "properties": { + "breed": {"type": "string"}, + "name": {"type": "string"}, + }, + } + } + }, + { + "type": "object", + "properties": { + "dogs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "breed": {"type": "string"}, + "name": {"type": "string"}, + }, + }, + } + }, + }, + id="resolve_schema_references_with_items", + ), + pytest.param( + { + "type": "object", + "properties": { + "thing": { + "type": "object", + "properties": { + "name": {"$ref": "references.json#/definitions/string_type"} + }, + } + }, + }, + {"references.json": {"definitions": {"string_type": {"type": "string"}}}}, + { + "type": "object", + "properties": { + "thing": { + "type": "object", + "properties": {"name": {"type": "string"}}, + } + }, + }, + id="resolve_schema_nested_references", + ), + pytest.param( + { + "type": "object", + "properties": { + "name": {"$ref": "references.json#/definitions/string_type"} + }, + }, + { + "references.json": { + "definitions": {"string_type": {"$ref": "second_reference.json"}} + }, + "second_reference.json": {"type": "string"}, + }, + {"type": "object", "properties": {"name": {"type": "string"}}}, + id="resolve_schema_indirect_references", + ), + pytest.param( + { + "type": "object", + "properties": { + "name": { + "$ref": "references.json#/definitions/string_type", + "still_here": "yep", + } + }, + }, + {"references.json": {"definitions": {"string_type": {"type": "string"}}}}, + { + "type": "object", + "properties": {"name": {"type": "string", "still_here": "yep"}}, + }, + id="resolve_schema_preserves_existing_fields", + ), + pytest.param( + { + "anyOf": [ + {"$ref": "references.json#/definitions/first_type"}, + {"$ref": "references.json#/definitions/second_type"}, + ] + }, + { + "references.json": { + "definitions": { + "first_type": {"type": "string"}, + "second_type": {"type": "integer"}, + } + } + }, + {"anyOf": [{"type": "string"}, {"type": "integer"}]}, + id="resolve_schema_any_of", + ), + ], +) +def test_resolve_schema_references(schema, refs, expected): + """Test resolving schema references.""" + assert resolve_schema_references(schema, refs) == expected