Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable Taps and Targets to generate their own Meltano yaml files #1094

Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9f56e96
chore: initial refactor for readability
aaronsteers Oct 20, 2022
b85760a
feat: add Meltano rendering logic in private helper module
aaronsteers Oct 20, 2022
8d95b22
feat: add `secret=True` support in JSON Schema type helpers
aaronsteers Oct 20, 2022
2820b91
change: update examples to use 'secret=True' for protected settings
aaronsteers Oct 20, 2022
e74be3f
chore: flake8 fix
aaronsteers Oct 20, 2022
e0239b5
add unit tests for type helpers
aaronsteers Oct 20, 2022
17905b1
fix missing secret flag on unit test
aaronsteers Oct 20, 2022
9d6a364
chore: get tests passing
aaronsteers Oct 20, 2022
8ad7727
chore: add test for description
aaronsteers Oct 20, 2022
2792aa1
chore: remove commented code
aaronsteers Oct 20, 2022
b86cfc4
chore: remove files related to #1094
aaronsteers Oct 20, 2022
6887faf
chore: dummy commit
aaronsteers Oct 20, 2022
05edd84
Merge branch '77-feat-secrets-support-in-config-and-streams' into 135…
aaronsteers Oct 20, 2022
2a87092
chore: add back files
aaronsteers Oct 20, 2022
0bf2fb8
chore: revert dummy change
aaronsteers Oct 20, 2022
2b83266
chore: revert --about updates
aaronsteers Oct 20, 2022
f8c734a
Merge branch 'main' into 77-feat-secrets-support-in-config-and-streams
aaronsteers Oct 20, 2022
28be9cb
Merge branch '77-feat-secrets-support-in-config-and-streams' into 135…
aaronsteers Oct 20, 2022
3cb21fc
chore: reapply updates
aaronsteers Oct 20, 2022
3018690
move to new `singer-tools` helper CLI
aaronsteers Oct 21, 2022
122e8eb
tweak cli defs
aaronsteers Oct 21, 2022
1bfb60a
chore: remove stray file
aaronsteers Oct 21, 2022
23aae72
chore: make file callable
aaronsteers Oct 21, 2022
78d8aee
merge from origin/main
aaronsteers Oct 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class Tap{{ cookiecutter.source_name }}({{ 'SQL' if cookiecutter.stream_type ==
"auth_token",
th.StringType,
required=True,
secret=True, # Flag config as protected.
description="The token to authenticate against the API service"
),
th.Property(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.Property(
"sqlalchemy_url",
th.StringType,
secret=True, # Flag config as protected.
description="SQLAlchemy connection string",
),
{%- else %}
Expand All @@ -34,6 +35,12 @@ class Target{{ cookiecutter.destination_name }}({{ target_class }}):
th.StringType,
description="The scheme with which output files will be named"
),
th.Property(
"auth_token",
th.StringType,
secret=True, # Flag config as protected.
description="The path to the target output file"
),
{%- endif %}
).to_dict()

Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_gitlab/gitlab_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class SampleTapGitlab(Tap):

name: str = "sample-tap-gitlab"
config_jsonschema = PropertiesList(
Property("auth_token", StringType, required=True),
Property("auth_token", StringType, required=True, secret=True),
Property("project_ids", ArrayType(StringType), required=True),
Property("group_ids", ArrayType(StringType), required=True),
Property("start_date", DateTimeType, required=True),
Expand Down
2 changes: 1 addition & 1 deletion samples/sample_tap_google_analytics/ga_tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class SampleTapGoogleAnalytics(Tap):
config_jsonschema = PropertiesList(
Property("view_id", StringType(), required=True),
Property("client_email", StringType(), required=True),
Property("private_key", StringType(), required=True),
Property("private_key", StringType(), required=True, secret=True),
).to_dict()

def discover_streams(self) -> List[SampleGoogleAnalyticsStream]:
Expand Down
110 changes: 110 additions & 0 deletions singer_sdk/helpers/_meltano.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Helper functions for Meltano and MeltanoHub interop."""

from __future__ import annotations

from ._typing import (
is_array_type,
is_boolean_type,
is_datetime_type,
is_integer_type,
is_object_type,
is_secret_type,
is_string_type,
)


def _to_meltano_kind(jsonschema_type: dict) -> str | None:
"""Returns a Meltano `kind` indicator for the provided JSON Schema type.

For reference:
https://docs.meltano.com/reference/plugin-definition-syntax#settingskind

Args:
jsonschema_type: JSON Schema type to check.

Returns:
A string representing the meltano 'kind'.
"""
if is_secret_type(jsonschema_type):
return "password"

if is_string_type(jsonschema_type):
return "string"

if is_object_type(jsonschema_type):
return "object"

if is_array_type(jsonschema_type):
return "array"

if is_boolean_type(jsonschema_type):
return "boolean"

if is_datetime_type(jsonschema_type):
return "date_iso8601"

if is_integer_type(jsonschema_type):
return "integer"

return None


def meltano_yaml_str(
Copy link
Collaborator

@edgarrmondragon edgarrmondragon Oct 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aaronsteers So, I guess this is done as pure string manipulation so comments can be added.

Wouldn't it make more sense for Meltano to ingest the output of --about --format=json to generate the config it expects? I say it because the SDK shouldn't need to know about what fields Meltano expects, but Meltano could have a utility CLI (bundled in the same Python meltano package perhaps) to parse the SDK plugin metadata:

tap-github --about --format=json | meltano-parse-sdk-info

Another benefit of that approach is that developers (or us) don't need to upgrade their packages to be able to output the right Meltano config, rather users of Meltano bump to the latest meltano package to get this utility.

Developers could still bump the SDK version in their packages if we update the metadata exposed through the --about --format=json output.

Wydt?

Copy link
Contributor Author

@aaronsteers aaronsteers Oct 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aaronsteers So, I guess this is done as pure string manipulation so comments can be added.

Wouldn't it make more sense for Meltano to ingest the output of --about --format=json to generate the config it expects? I say it because the SDK shouldn't need to know about what fields Meltano expects, but Meltano could have a utility CLI (bundled in the same Python meltano package perhaps) to parse the SDK plugin metadata:

tap-github --about --format=json | meltano-parse-sdk-info

Another benefit of that approach is that developers (or us) don't need to upgrade their packages to be able to output the right Meltano config, rather users of Meltano bump to the latest meltano package to get this utility.

Developers could still bump the SDK version in their packages if we update the metadata exposed through the --about --format=json output.

Wydt?

My intuition was similar at first, and certainly these are good points. Unfortunately, there just isn't a natural place for this to live in Meltano as of yet, and I think it will harder if we try to iterate both tools in lock step. I think we have an opportunity in the SDK to introduce a first round of interop, followed by quick patches if/when we find any issues. When stable and proven, we can migrate this functionality into Meltano - but I don't know that we yet have a good enough feature to make it worth adding to Meltano and then phasing out when we inevitably have a more robust paradigm. There just isn't anywhere I can imagine us putting this into Meltano as of today that wouldn't need to be removed/replaced in a future Meltano revision. But meanwhile, if we iterate in the SDK, we should get to a place where the issues are ironed out, and then we'll be closer to something like a meltano publish extractor or meltano create extractor type of a workflow.

All that said, I really don't think there is any issue with baking MeltanoHub interop directly into the SDK, since it directly helps developers promote their taps, and likewise streamlines adoption/onboarding for users even if the tap is not on the Hub. It's not intrusive and doesn't overly promote or favor Meltano over other orchestrators.

It's possible we'll phase out --about --format=meltano before we get to 1.0, but I think it's a valuable and (relatively) stable increment from where we are today.

Copy link
Contributor Author

@aaronsteers aaronsteers Oct 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@edgarrmondragon - Since our conversation, I've refactored this to an internal/dev-side CLI.

% sample-tap-countries --about --format=json > tap-countries.about.json
% singer-tools analyze --from-file=tap-countries.about.json --out-dir=.output
Meltano plugin definition: .output/sample-tap-countries.meltano.yml

This doesn't change the experience at all for tap users, but it makes a new command available to developers and power users who want to leverage it. As noted in poetry.toml, this can be executed from with the SDK repo during development, or by installing singer-sdk as a standalone program with pipx.

I don't think we should necessarily promote at this time, but developers can also execute this with python -m singer_sdk.dev.cli --help or python -m singer_sdk.dev.cli analyze ... anywhere that the singer_sdk library is installed.

@WillDaSilva - In the long run, I wonder if we can+should put the hub analyze work here in the SDK singer-tools dev CLI. One of the reasons the analyze command stores its output to a directory of files is that I am imagining we could move that complex /plugin-test logic here, and then let the Hub call this command - analyzing the tap or target and storing a bunch of file outputs based on the discovery and analysis.

Copy link
Contributor

@pnadolny13 pnadolny13 Oct 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aaronsteers my initial reactions were the same as @edgarrmondragon's in that we're tightly coupling the hub and the SDK/Meltano CLI by baking it in. Not that we're planning to change the hub definition structure much but if we ever did make a change then it means we'll most likely get user contributing invalid definitions because they're a version out of date.

What if we use the EDK to make a utility that converts the --format=json output to a hub yaml file. It's not necessarily where I was going with it originally but I created a utility that helps create taps/targets hub definitions and puts them in the right location in the hub repo. If we had a meltano extension that contained this logic then we're not tied to the SDK version or meltano version either, the user could run meltano add utility hub-utils then meltano invoke hub-utils add and it kicks off prompts to get the --about scrapped, prompt for any other info like a logo png, and write the yaml file to the appropriate directory in the hub repo. If we make any changes to the format or whatever, the user just needs to re-install the updated utility to get support for those changes.

plugin_name: str,
capabilities: list[str],
config_jsonschema: dict,
) -> str:
"""Returns a Meltano plugin definition as a yaml string.

Args:
plugin_name: Name of the plugin.
capabilities: List of capabilities.
config_jsonschema: JSON Schema of the expected config.

Returns:
A string representing the Meltano plugin Yaml definition.
"""
capabilities_str: str = "\n".join(
[" - {capability}" for capability in capabilities]
)
settings_str: str = "\n".join(
[
f"""
- name: {setting_name}
label: {setting_name.replace("_", " ").proper()}
kind: {_to_meltano_kind(type_dict["type"])},
description: {type_dict.get("description", 'null')}
"""
for setting_name, type_dict in config_jsonschema["properties"].items()
]
)
required_settings = [
setting_name
for setting_name, type_dict in config_jsonschema.items()
if setting_name in config_jsonschema.get("required", [])
or type_dict.get("required", False)
]
settings_group_validation_str = " - - " + "\n - ".join(required_settings)

return f"""
name: {plugin_name}
namespace: {plugin_name.replace('-', '_')}

## The following could not be auto-detected:
# maintenance_status: #
# repo: #
# variant: #
# label: #
# description: #
# pip_url: #
# domain_url: #
# logo_url: #
# keywords: [] #

capabilities:
{capabilities_str}
settings_group_validation:
{settings_group_validation_str}
settings:
{settings_str}
"""
61 changes: 61 additions & 0 deletions singer_sdk/helpers/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,40 @@ def append_type(type_dict: dict, new_type: str) -> dict:
)


def is_secret_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition appears to be a secret.

Will return true if either `writeOnly` or `sensitive` are true on this type
or any of the type's subproperties.

Args:
type_dict: The JSON Schema type to check.

Raises:
ValueError: If type_dict is None or empty.

Returns:
True if we detect any sensitive property nodes.
"""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)

if type_dict.get("writeOnly") or type_dict.get("sensitive"):
return True

if "properties" in type_dict:
# Recursively check subproperties and return True if any child is secret.
return any(
is_secret_type(child_type_dict)
for child_type_dict in type_dict["properties"].values()
)

return False


def is_object_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is an object or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down Expand Up @@ -152,6 +186,23 @@ def is_string_array_type(type_dict: dict) -> bool:
return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"]))


def is_array_type(type_dict: dict) -> bool:
"""Return True if JSON Schema type definition is a string array."""
if not type_dict:
raise ValueError(
"Could not detect type from empty type_dict. "
"Did you forget to define a property in the stream schema?"
)

if "anyOf" in type_dict:
return any([is_array_type(t) for t in type_dict["anyOf"]])

if "type" not in type_dict:
raise ValueError(f"Could not detect type from schema '{type_dict}'")

return "array" in type_dict["type"]


def is_boolean_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand All @@ -162,6 +213,16 @@ def is_boolean_type(property_schema: dict) -> Optional[bool]:
return False


def is_integer_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
return None # Could not detect data type
for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
if "integer" in property_type or property_type == "integer":
return True
return False


def is_string_type(property_schema: dict) -> Optional[bool]:
"""Return true if the JSON Schema type is a boolean or None if detection fails."""
if "anyOf" not in property_schema and "type" not in property_schema:
Expand Down
120 changes: 68 additions & 52 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from singer_sdk.exceptions import ConfigValidationError
from singer_sdk.helpers._classproperty import classproperty
from singer_sdk.helpers._compat import metadata
from singer_sdk.helpers._meltano import meltano_yaml_str
from singer_sdk.helpers._secrets import SecretString, is_common_secret_key
from singer_sdk.helpers._util import read_json_file
from singer_sdk.helpers.capabilities import (
Expand Down Expand Up @@ -341,64 +342,79 @@ def print_about(cls: Type["PluginBase"], format: Optional[str] = None) -> None:

if format == "json":
print(json.dumps(info, indent=2, default=str))
return

elif format == "markdown":
max_setting_len = cast(
int, max(len(k) for k in info["settings"]["properties"].keys())
)
if format == "markdown":
cls._print_about_markdown(info)
return

# Set table base for markdown
table_base = (
f"| {'Setting':{max_setting_len}}| Required | Default | Description |\n"
f"|:{'-' * max_setting_len}|:--------:|:-------:|:------------|\n"
)
if format == "meltano":
print(meltano_yaml_str(cls.name, cls.capabilities, cls.config_jsonschema))
return

# Empty list for string parts
md_list = []
# Get required settings for table
required_settings = info["settings"].get("required", [])
formatted = "\n".join([f"{k.title()}: {v}" for k, v in info.items()])
print(formatted)

# Iterate over Dict to set md
md_list.append(
f"# `{info['name']}`\n\n"
f"{info['description']}\n\n"
f"Built with the [Meltano Singer SDK](https://sdk.meltano.com).\n\n"
)
for key, value in info.items():

if key == "capabilities":
capabilities = f"## {key.title()}\n\n"
capabilities += "\n".join([f"* `{v}`" for v in value])
capabilities += "\n\n"
md_list.append(capabilities)

if key == "settings":
setting = f"## {key.title()}\n\n"
for k, v in info["settings"].get("properties", {}).items():
md_description = v.get("description", "").replace("\n", "<BR/>")
table_base += (
f"| {k}{' ' * (max_setting_len - len(k))}"
f"| {'True' if k in required_settings else 'False':8} | "
f"{v.get('default', 'None'):7} | "
f"{md_description:11} |\n"
)
setting += table_base
setting += (
"\n"
+ "\n".join(
[
"A full list of supported settings and capabilities "
f"is available by running: `{info['name']} --about`"
]
)
+ "\n"
@classmethod
def _print_about_markdown(cls: Type["PluginBase"], info: dict) -> None:
"""Print about info as markdown.

Args:
info: The collected metadata for the class.
"""
max_setting_len = cast(
int, max(len(k) for k in info["settings"]["properties"].keys())
)

# Set table base for markdown
table_base = (
f"| {'Setting':{max_setting_len}}| Required | Default | Description |\n"
f"|:{'-' * max_setting_len}|:--------:|:-------:|:------------|\n"
)

# Empty list for string parts
md_list = []
# Get required settings for table
required_settings = info["settings"].get("required", [])

# Iterate over Dict to set md
md_list.append(
f"# `{info['name']}`\n\n"
f"{info['description']}\n\n"
f"Built with the [Meltano Singer SDK](https://sdk.meltano.com).\n\n"
)
for key, value in info.items():

if key == "capabilities":
capabilities = f"## {key.title()}\n\n"
capabilities += "\n".join([f"* `{v}`" for v in value])
capabilities += "\n\n"
md_list.append(capabilities)

if key == "settings":
setting = f"## {key.title()}\n\n"
for k, v in info["settings"].get("properties", {}).items():
md_description = v.get("description", "").replace("\n", "<BR/>")
table_base += (
f"| {k}{' ' * (max_setting_len - len(k))}"
f"| {'True' if k in required_settings else 'False':8} | "
f"{v.get('default', 'None'):7} | "
f"{md_description:11} |\n"
)
setting += table_base
setting += (
"\n"
+ "\n".join(
[
"A full list of supported settings and capabilities "
f"is available by running: `{info['name']} --about`"
]
)
md_list.append(setting)
+ "\n"
)
md_list.append(setting)

print("".join(md_list))
else:
formatted = "\n".join([f"{k.title()}: {v}" for k, v in info.items()])
print(formatted)
print("".join(md_list))

@classproperty
def cli(cls) -> Callable:
Expand Down
Loading