Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Recreate indexes when calling transform when possible and raise an er… #634

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -1967,6 +1967,28 @@ def transform_sql(
sqls.append(
"ALTER TABLE [{}] RENAME TO [{}];".format(new_table_name, self.name)
)
# Re-add existing indexes
for index in self.indexes:
if index.origin not in ("pk"):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this to 'index.origin != "pk"` instead.

index_sql = self.db.execute(
"""SELECT sql FROM sqlite_master WHERE type = 'index' AND name = :index_name;""",
{"index_name": index.name},
).fetchall()[0][0]
assert index_sql is not None, (
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with assert for this kind of thing is that these won't execute if Python is run in optimize mode: https://docs.python.org/3/using/cmdline.html#cmdoption-O

I don't personally like that feature, but I think we should switch to a different exception just in case.

f"Index '{index}' on table '{self.name}' does not have a "
"CREATE INDEX statement. You must manually drop this index prior to running this "
"transformation and manually recreate the new index after running this transformation."
)
if keep_table:
sqls.append(f"DROP INDEX IF EXISTS [{index.name}];")
for col in index.columns:
assert col not in rename.keys() and col not in drop, (
f"Index '{index.name}' column '{col}' is not in updated table '{self.name}'. "
f"You must manually drop this index prior to running this transformation "
f"and manually recreate the new index after running this transformation. "
f"The original index sql statement is: `{index_sql}`. No changes have been applied to this table."
)
sqls.append(index_sql)
return sqls

def extract(
Expand Down
91 changes: 91 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,3 +539,94 @@ def test_transform_strict(fresh_db, strict):
assert dogs.strict == strict or not fresh_db.supports_strict
dogs.transform(not_null={"name"})
assert dogs.strict == strict or not fresh_db.supports_strict


@pytest.mark.parametrize(
"indexes, transform_params",
[
([["name"]], {"types": {"age": str}}),
([["name"], ["age", "breed"]], {"types": {"age": str}}),
([], {"types": {"age": str}}),
([["name"]], {"types": {"age": str}, "keep_table": "old_dogs"}),
],
)
def test_transform_indexes(fresh_db, indexes, transform_params):
dogs = fresh_db["dogs"]
dogs.insert({"id": 1, "name": "Cleo", "age": 5, "breed": "Labrador"}, pk="id")

for index in indexes:
dogs.create_index(index)

indexes_before_transform = dogs.indexes

dogs.transform(**transform_params)

assert sorted(
[
{k: v for k, v in idx._asdict().items() if k != "seq"}
for idx in dogs.indexes
],
key=lambda x: x["name"],
) == sorted(
[
{k: v for k, v in idx._asdict().items() if k != "seq"}
for idx in indexes_before_transform
],
key=lambda x: x["name"],
), f"Indexes before transform: {indexes_before_transform}\nIndexes after transform: {dogs.indexes}"
if "keep_table" in transform_params:
assert all(
index.origin == "pk"
for index in fresh_db[transform_params["keep_table"]].indexes
)


def test_transform_retains_indexes_with_foreign_keys(fresh_db):
dogs = fresh_db["dogs"]
owners = fresh_db["owners"]

dogs.insert({"id": 1, "name": "Cleo", "owner_id": 1}, pk="id")
owners.insert({"id": 1, "name": "Alice"}, pk="id")

dogs.create_index(["name"])

indexes_before_transform = dogs.indexes

fresh_db.add_foreign_keys([("dogs", "owner_id", "owners", "id")]) # calls transform

assert sorted(
[
{k: v for k, v in idx._asdict().items() if k != "seq"}
for idx in dogs.indexes
],
key=lambda x: x["name"],
) == sorted(
[
{k: v for k, v in idx._asdict().items() if k != "seq"}
for idx in indexes_before_transform
],
key=lambda x: x["name"],
), f"Indexes before transform: {indexes_before_transform}\nIndexes after transform: {dogs.indexes}"


@pytest.mark.parametrize(
"transform_params",
[
{"rename": {"age": "dog_age"}},
{"drop": ["age"]},
],
)
def test_transform_with_indexes_errors(fresh_db, transform_params):
dogs = fresh_db["dogs"]
dogs.insert({"id": 1, "name": "Cleo", "age": 5}, pk="id")

dogs.create_index(["name", "age"])

with pytest.raises(AssertionError) as excinfo:
dogs.transform(**transform_params)

assert (
"Index 'idx_dogs_name_age' column 'age' is not in updated table 'dogs'. "
"You must manually drop this index prior to running this transformation"
in str(excinfo.value)
)
Loading