Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: TreeMap migration #20346

Merged
merged 3 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""adding_advanced_data_type.py
"""adding advanced data type to column models
Revision ID: 6f139c533bea
Revises: cbe71abde154
Create Date: 2021-05-27 16:10:59.567684
Comment on lines -17 to +22
Copy link
Member Author

@zhaoyongjie zhaoyongjie Jun 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bycatch: pretty output

image

"""

import sqlalchemy as sa
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Migrating legacy TreeMap

Revision ID: c747c78868b6
Revises: e786798587de
Create Date: 2022-06-30 22:04:17.686635

"""

# revision identifiers, used by Alembic.

revision = "c747c78868b6"
down_revision = "7fb8bca906d2"

from alembic import op
from sqlalchemy import and_, Column, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base

from superset import db
from superset.utils.migrate_viz import get_migrate_class, MigrateVizEnum

treemap_processor = get_migrate_class[MigrateVizEnum.treemap]

Base = declarative_base()


class Slice(Base):
__tablename__ = "slices"

id = Column(Integer, primary_key=True)
slice_name = Column(String(250))
viz_type = Column(String(250))
params = Column(Text)
query_context = Column(Text)


def upgrade():
bind = op.get_bind()
session = db.Session(bind=bind)

slices = session.query(Slice).filter(
Slice.viz_type == treemap_processor.source_viz_type
)
total = slices.count()
idx = 0
for slc in slices.yield_per(100):
try:
idx += 1
print(f"Upgrading ({idx}/{total}): {slc.slice_name}#{slc.id}")
new_viz = treemap_processor.upgrade(slc)
session.merge(new_viz)
session.commit()
zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
except Exception as exc:
print(
"Error while processing migration: '{}'\nError: {}\n".format(
slc.slice_name, str(exc)
)
)

session.close()


def downgrade():
bind = op.get_bind()
session = db.Session(bind=bind)

slices = session.query(Slice).filter(
and_(
Slice.viz_type == treemap_processor.target_viz_type,
Slice.params.like("%form_data_bak%"),
)
)
total = slices.count()
idx = 0
for slc in slices.yield_per(100):
try:
idx += 1
print(f"Downgrading ({idx}/{total}): {slc.slice_name}#{slc.id}")
new_viz = treemap_processor.downgrade(slc)
session.merge(new_viz)
session.commit()
except Exception as exc:
print(
"Error while processing migration: '{}'\nError: {}\n".format(
slc.slice_name, str(exc)
)
)

session.close()
125 changes: 125 additions & 0 deletions superset/utils/migrate_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

import json
from enum import Enum
from typing import Dict, Set, Type, TYPE_CHECKING

if TYPE_CHECKING:
from superset.models.slice import Slice


# pylint: disable=invalid-name
class MigrateVizEnum(str, Enum):
# the Enum member name is viz_type in database
treemap = "treemap"


class MigrateViz:
remove_keys: Set[str] = set()

zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
mapping_keys: Dict[str, str] = {}

zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
source_viz_type: str

zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
target_viz_type: str

def __init__(self, form_data: str) -> None:
self.data = json.loads(form_data)

def _pre_action(self) -> None:
"""some actions before migrate"""

def _migrate(self) -> None:
if self.data.get("viz_type") != self.source_viz_type:
return

if "viz_type" in self.data:
self.data["viz_type"] = self.target_viz_type

rv_data = {}
for (key, value) in self.data.items():
zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
if key in self.mapping_keys and self.mapping_keys[key] in rv_data:
raise ValueError("Duplicate key in target viz")

if key in self.mapping_keys:
rv_data[self.mapping_keys[key]] = value
Comment on lines +54 to +58
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally find this more readable, but it's not a blocker.

Suggested change
if key in self.mapping_keys and self.mapping_keys[key] in rv_data:
raise ValueError("Duplicate key in target viz")
if key in self.mapping_keys:
rv_data[self.mapping_keys[key]] = value
if key in self.mapping_keys:
mapped_key = self.mapping_keys[key]
if mapped_key in rv_data:
raise ValueError("Duplicate key in target viz")
rv_data[mapped_key] = value

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exception handling is easier to maintain if it can be put together.


if key in self.remove_keys:
continue

rv_data[key] = value

self.data = rv_data

def _post_action(self) -> None:
"""some actions after migrate"""

@classmethod
def upgrade(cls, slc: Slice) -> Slice:
clz = cls(slc.params)
slc.viz_type = cls.target_viz_type
form_data_bak = clz.data.copy()

clz._pre_action()
clz._migrate()
clz._post_action()

# only backup params
slc.params = json.dumps({**clz.data, "form_data_bak": form_data_bak})

query_context = json.loads(slc.query_context or "{}")
if "form_data" in query_context:
query_context["form_data"] = clz.data
slc.query_context = json.dumps(query_context)
return slc

@classmethod
def downgrade(cls, slc: Slice) -> Slice:
form_data = json.loads(slc.params)
if "form_data_bak" in form_data and "viz_type" in form_data.get(
"form_data_bak"
):
form_data_bak = form_data["form_data_bak"]
slc.params = json.dumps(form_data_bak)
slc.viz_type = form_data_bak.get("viz_type")

query_context = json.loads(slc.query_context or "{}")
if "form_data" in query_context:
query_context["form_data"] = form_data_bak
slc.query_context = json.dumps(query_context)
return slc


class MigrateTreeMap(MigrateViz):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe move MigrateTreeMap to its own file? I guess we'll have a bunch of these.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a one-time script. There is no need to split moudles.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't we have other viz types to be migrated? If yes, are you going to add all the viz types to this file?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, all migrations are placed in this script.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally, scripts are waterfalls, while libary should be modular.

source_viz_type = "treemap"
target_viz_type = "treemap_v2"
remove_keys = {"metrics"}

def _pre_action(self) -> None:
if (
"metrics" in self.data
and isinstance(self.data["metrics"], list)
and len(self.data["metrics"]) > 0
):
self.data["metric"] = self.data["metrics"][0]


get_migrate_class: Dict[MigrateVizEnum, Type[MigrateViz]] = {
MigrateVizEnum.treemap: MigrateTreeMap,
}
5 changes: 4 additions & 1 deletion tests/unit_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint: disable=redefined-outer-name, import-outside-toplevel

import importlib
import os
from typing import Any, Callable, Iterator

import pytest
Expand Down Expand Up @@ -69,7 +70,9 @@ def app() -> Iterator[SupersetApp]:
app = SupersetApp(__name__)

app.config.from_object("superset.config")
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite://"
app.config["SQLALCHEMY_DATABASE_URI"] = (
os.environ.get("SUPERSET__SQLALCHEMY_DATABASE_URI") or "sqlite://"
)
Comment on lines -72 to +75
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use integration test database instance for local debugging.

app.config["WTF_CSRF_ENABLED"] = False
app.config["PREVENT_UNSAFE_DB_CONNECTIONS"] = False
app.config["TESTING"] = True
Expand Down
93 changes: 93 additions & 0 deletions tests/unit_tests/utils/viz_migration/treemap_migration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Licensed to the Apache Software Foundation (ASF) under one
zhaoyongjie marked this conversation as resolved.
Show resolved Hide resolved
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json

from superset.app import SupersetApp
from superset.utils.migrate_viz import get_migrate_class, MigrateVizEnum

treemap_form_data = """{
"adhoc_filters": [
{
"clause": "WHERE",
"comparator": [
"Edward"
],
"expressionType": "SIMPLE",
"filterOptionName": "filter_xhbus6irfa_r10k9nwmwy",
"isExtra": false,
"isNew": false,
"operator": "IN",
"operatorId": "IN",
"sqlExpression": null,
"subject": "name"
}
],
"color_scheme": "bnbColors",
"datasource": "2__table",
"extra_form_data": {},
"granularity_sqla": "ds",
"groupby": [
"state",
"gender"
],
"metrics": [
"sum__num"
],
"number_format": ",d",
"order_desc": true,
"row_limit": 10,
"time_range": "No filter",
"timeseries_limit_metric": "sum__num",
"treemap_ratio": 1.618033988749895,
"viz_type": "treemap"
}
"""

treemap_processor = get_migrate_class[MigrateVizEnum.treemap]


def test_treemap_migrate(app_context: SupersetApp) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe have separated tests for upgrade and downgrade?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the method of downgrade needs an updated slice so combine upgrade and downgrade in the same unit test case.

from superset.models.slice import Slice

slc = Slice(
viz_type="treemap",
datasource_type="table",
params=treemap_form_data,
query_context=f'{{"form_data": {treemap_form_data}}}',
)

slc = treemap_processor.upgrade(slc)
assert slc.viz_type == treemap_processor.target_viz_type
# verify form_data
new_form_data = json.loads(slc.params)
assert new_form_data["metric"] == "sum__num"
assert new_form_data["viz_type"] == "treemap_v2"
assert "metrics" not in new_form_data
assert json.dumps(new_form_data["form_data_bak"], sort_keys=True) == json.dumps(
json.loads(treemap_form_data), sort_keys=True
)

# verify query_context
new_query_context = json.loads(slc.query_context)
assert new_query_context["form_data"]["viz_type"] == "treemap_v2"

# downgrade
slc = treemap_processor.downgrade(slc)
assert slc.viz_type == treemap_processor.source_viz_type
assert json.dumps(json.loads(slc.params), sort_keys=True) == json.dumps(
json.loads(treemap_form_data), sort_keys=True
)