Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add endpoint to fetch available DBs #14208

Merged
merged 2 commits into from
Apr 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,18 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
"postgresql": "PostgreSQLValidator",
}

# A list of preferred databases, in order. These databases will be
# displayed prominently in the "Add Database" dialog. You should
# use the "engine" attribute of the corresponding DB engine spec in
# `superset/db_engine_specs/`.
PREFERRED_DATABASES: List[str] = [
# "postgresql",
# "presto",
# "mysql",
# "sqlite",
# etc.
]

# Do you want Talisman enabled?
TALISMAN_ENABLED = False
# If you want Talisman, how do you want it configured??
Expand Down
72 changes: 69 additions & 3 deletions superset/databases/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
from datetime import datetime
from io import BytesIO
from typing import Any, Optional
from typing import Any, Dict, List, Optional
from zipfile import ZipFile

from flask import g, request, Response, send_file
Expand All @@ -27,7 +27,7 @@
from marshmallow import ValidationError
from sqlalchemy.exc import NoSuchTableError, OperationalError, SQLAlchemyError

from superset import event_logger
from superset import app, event_logger
from superset.commands.exceptions import CommandInvalidError
from superset.commands.importers.v1.utils import get_contents_from_bundle
from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
Expand Down Expand Up @@ -63,6 +63,8 @@
TableMetadataResponseSchema,
)
from superset.databases.utils import get_table_metadata
from superset.db_engine_specs import get_available_engine_specs
from superset.db_engine_specs.base import BaseParametersMixin
from superset.extensions import security_manager
from superset.models.core import Database
from superset.typing import FlaskResponse
Expand All @@ -84,6 +86,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
"test_connection",
"related_objects",
"function_names",
"available",
}
resource_name = "database"
class_permission_name = "Database"
Expand Down Expand Up @@ -821,7 +824,6 @@ def function_names(self, pk: int) -> Response:
schema:
type: integer
responses:
200:
200:
description: Query result
content:
Expand All @@ -839,3 +841,67 @@ def function_names(self, pk: int) -> Response:
if not database:
return self.response_404()
return self.response(200, function_names=database.function_names,)

@expose("/available/", methods=["GET"])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial thought is that this endpoint refers to a status of databases and not a new resource. Should we make this endpoint more restful and if so, how about something like GET /databases?status=available. On the other hand, I realize that we already have a lot of other existing routes that aren't particularly restful either, so then this route would be an outlier.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong opinion about this... @dpgaspar any thoughts here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the current pattern, i'd rather it be separate endpoint vs. trying to hotwire it into the database endpoint. It is cleaner for us to have endpoint -> command this will also help with error handling in the future.

Would love to hear what @dpgaspar has to say

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like we can leave this as is for consistency.

@protect()
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}" f".available",
log_to_statsd=False,
)
def available(self) -> Response:
"""Return names of databases currently available
---
get:
description:
Get names of databases currently available
responses:
200:
description: Database names
content:
application/json:
schema:
type: array
items:
type: object
properties:
name:
description: Name of the database
type: string
preferred:
description: Is the database preferred?
type: bool
sqlalchemy_uri_placeholder:
description: Example placeholder for the SQLAlchemy URI
type: string
parameters:
description: JSON schema defining the needed parameters
400:
$ref: '#/components/responses/400'
500:
$ref: '#/components/responses/500'
"""
preferred_databases: List[str] = app.config.get("PREFERRED_DATABASES", [])
available_databases = []
for engine_spec in get_available_engine_specs():
payload: Dict[str, Any] = {
"name": engine_spec.engine_name,
"engine": engine_spec.engine,
"preferred": engine_spec.engine in preferred_databases,
}

if issubclass(engine_spec, BaseParametersMixin):
payload["parameters"] = engine_spec.parameters_json_schema()
payload[
"sqlalchemy_uri_placeholder"
] = engine_spec.sqlalchemy_uri_placeholder

available_databases.append(payload)

available_databases.sort(
key=lambda payload: preferred_databases.index(payload["engine"])
if payload["engine"] in preferred_databases
else len(preferred_databases)
)

return self.response(200, databases=available_databases)
78 changes: 71 additions & 7 deletions superset/databases/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@

from flask import current_app
from flask_babel import lazy_gettext as _
from marshmallow import fields, Schema, validates_schema
from marshmallow import fields, pre_load, Schema, validates_schema
from marshmallow.validate import Length, ValidationError
from sqlalchemy import MetaData
from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import ArgumentError

from superset.db_engine_specs import get_engine_specs
from superset.db_engine_specs.base import BaseParametersMixin
from superset.exceptions import CertificateException, SupersetSecurityException
from superset.models.core import PASSWORD_MASK
from superset.security.analytics_db_safety import check_sqlalchemy_uri
Expand Down Expand Up @@ -207,7 +209,72 @@ def extra_validator(value: str) -> str:
return value


class DatabasePostSchema(Schema):
class DatabaseParametersSchemaMixin:
"""
Allow SQLAlchemy URI to be passed as separate parameters.

This mixing is a first step in allowing the users to test, create and
edit databases without having to know how to write a SQLAlchemy URI.
Instead, each databases defines the parameters that it takes (eg,
username, password, host, etc.) and the SQLAlchemy URI is built from
these parameters.

When using this mixin make sure that `sqlalchemy_uri` is not required.
"""

parameters = fields.Dict(
keys=fields.Str(),
values=fields.Raw(),
description="DB-specific parameters for configuration",
)

# pylint: disable=no-self-use, unused-argument
@pre_load
def build_sqlalchemy_uri(
self, data: Dict[str, Any], **kwargs: Any
) -> Dict[str, Any]:
"""
Build SQLAlchemy URI from separate parameters.

This is used for databases that support being configured by individual
parameters (eg, username, password, host, etc.), instead of requiring
the constructed SQLAlchemy URI to be passed.
"""
parameters = data.pop("parameters", None)
if parameters:
if "engine" not in parameters:
raise ValidationError(
[
_(
"An engine must be specified when passing "
"individual parameters to a database."
)
]
)
engine = parameters["engine"]

engine_specs = get_engine_specs()
if engine not in engine_specs:
raise ValidationError(
[_('Engine "%(engine)s" is not a valid engine.', engine=engine,)]
)
engine_spec = engine_specs[engine]
if not issubclass(engine_spec, BaseParametersMixin):
raise ValidationError(
[
_(
'Engine spec "%(engine_spec)s" does not support '
"being configured via individual parameters.",
engine_spec=engine_spec.__name__,
)
]
)

data["sqlalchemy_uri"] = engine_spec.build_sqlalchemy_url(parameters)
return data


class DatabasePostSchema(Schema, DatabaseParametersSchemaMixin):
database_name = fields.String(
description=database_name_description, required=True, validate=Length(1, 250),
)
Expand Down Expand Up @@ -242,12 +309,11 @@ class DatabasePostSchema(Schema):
)
sqlalchemy_uri = fields.String(
description=sqlalchemy_uri_description,
required=True,
validate=[Length(1, 1024), sqlalchemy_uri_validator],
)


class DatabasePutSchema(Schema):
class DatabasePutSchema(Schema, DatabaseParametersSchemaMixin):
database_name = fields.String(
description=database_name_description, allow_none=True, validate=Length(1, 250),
)
Expand Down Expand Up @@ -282,12 +348,11 @@ class DatabasePutSchema(Schema):
)
sqlalchemy_uri = fields.String(
description=sqlalchemy_uri_description,
allow_none=True,
validate=[Length(0, 1024), sqlalchemy_uri_validator],
)


class DatabaseTestConnectionSchema(Schema):
class DatabaseTestConnectionSchema(Schema, DatabaseParametersSchemaMixin):
database_name = fields.String(
description=database_name_description, allow_none=True, validate=Length(1, 250),
)
Expand All @@ -305,7 +370,6 @@ class DatabaseTestConnectionSchema(Schema):
)
sqlalchemy_uri = fields.String(
description=sqlalchemy_uri_description,
required=True,
validate=[Length(1, 1024), sqlalchemy_uri_validator],
)

Expand Down
25 changes: 23 additions & 2 deletions superset/db_engine_specs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@
import pkgutil
from importlib import import_module
from pathlib import Path
from typing import Any, Dict, List, Type
from typing import Any, Dict, List, Set, Type

import sqlalchemy.databases
from pkg_resources import iter_entry_points

from superset.db_engine_specs.base import BaseEngineSpec
Expand Down Expand Up @@ -67,7 +68,7 @@ def get_engine_specs() -> Dict[str, Type[BaseEngineSpec]]:
try:
engine_spec = ep.load()
except Exception: # pylint: disable=broad-except
logger.warning("Unable to load engine spec: %s", engine_spec)
logger.warning("Unable to load Superset DB engine spec: %s", engine_spec)
continue
engine_specs.append(engine_spec)

Expand All @@ -82,3 +83,23 @@ def get_engine_specs() -> Dict[str, Type[BaseEngineSpec]]:
engine_specs_map[name] = engine_spec

return engine_specs_map


def get_available_engine_specs() -> List[Type[BaseEngineSpec]]:
# native SQLAlchemy dialects
backends: Set[str] = {
getattr(sqlalchemy.databases, attr).dialect.name
for attr in sqlalchemy.databases.__all__
}

# installed 3rd-party dialects
for ep in iter_entry_points("sqlalchemy.dialects"):
try:
dialect = ep.load()
except Exception: # pylint: disable=broad-except
logger.warning("Unable to load SQLAlchemy dialect: %s", dialect)
else:
backends.add(dialect.name)

engine_specs = get_engine_specs()
return [engine_specs[backend] for backend in backends if backend in engine_specs]
Loading