diff --git a/git_history/cli.py b/git_history/cli.py index 5574022..1c05729 100644 --- a/git_history/cli.py +++ b/git_history/cli.py @@ -5,6 +5,7 @@ import sqlite_utils import textwrap from pathlib import Path +from .utils import fix_reserved_columns def iterate_file_versions( @@ -181,17 +182,13 @@ def file( # If --id is specified, do things a bit differently if ids: - # If '--id id' is only option, 'id' is not a reserved column - id_is_reserved = list(ids) != ["id"] # Any ids that are reserved columns must be renamed fixed_ids = set( fix_reserved_columns( {id: 1 for id in ids}, - allow_id=not id_is_reserved, - allow_banned=True, ).keys() ) - items_insert_extra_kwargs["pk"] = "id" + items_insert_extra_kwargs["pk"] = "_id" # Check all items have those columns _ids_set = set(ids) bad_items = [ @@ -209,7 +206,7 @@ def file( items_insert_extra_kwargs["replace"] = True # Which of these are new versions of things we have seen before for item in items: - item = fix_reserved_columns(item, allow_id=not id_is_reserved) + item = fix_reserved_columns(item) item_id = _hash(dict((id, item.get(id)) for id in fixed_ids)) if item_id in item_ids_in_this_version: if not ignore_duplicate_ids: @@ -240,9 +237,9 @@ def file( id_last_hash[item_id] = item_hash version = id_versions.get(item_id, 0) + 1 id_versions[item_id] = version - items_to_add.append(dict(item, id=item_id)) + items_to_add.append(dict(item, _id=item_id)) versions.append( - dict(item, item=item_id, version=version, commit=git_hash) + dict(item, _item=item_id, _version=version, _commit=git_hash) ) # Only add the items that had no new version @@ -252,15 +249,17 @@ def file( # not ids - so just check them for banned columns and add the item["commit"] for item in items: item = fix_reserved_columns(item) - item["commit"] = git_hash + item["_commit"] = git_hash # In this case item table needs a foreign key on 'commit' - items_insert_extra_kwargs["foreign_keys"] = (("commit", "commits", "hash"),) + items_insert_extra_kwargs["foreign_keys"] = ( + ("_commit", "commits", "hash"), + ) # insert items if items: db["items"].insert_all( items, - column_order=("id",), + column_order=("_id",), alter=True, **items_insert_extra_kwargs, ) @@ -269,11 +268,14 @@ def file( if versions: db["item_versions"].insert_all( versions, - pk=("item", "version"), + pk=("_item", "_version"), alter=True, replace=True, - column_order=("item", "version", "commit"), - foreign_keys=(("item", "items", "id"), ("commit", "commits", "hash")), + column_order=("_item", "_version", "_commit"), + foreign_keys=( + ("_item", "items", "_id"), + ("_commit", "commits", "hash"), + ), ) @@ -283,27 +285,3 @@ def _hash(record): "utf8" ) ).hexdigest() - - -def fix_reserved_columns(item, allow_id=False, allow_banned=False): - reserved = {"item", "version", "commit", "rowid"} - banned = {"id_", "item_", "version_", "commit_"} - if not allow_id: - reserved.add("id") - if not allow_banned and any(key in banned for key in item): - raise click.ClickException( - "Column {} is one of these banned columns: {}\n{}".format( - sorted([key for key in item if key in banned]), - sorted(banned), - json.dumps(item, indent=4, default=str), - ) - ) - if not any(key in reserved for key in item): - return item - new_item = {} - for key in item: - if key in reserved: - new_item[key + "_"] = item[key] - else: - new_item[key] = item[key] - return new_item diff --git a/tests/test_git_history.py b/tests/test_git_history.py index d0183d7..8df6153 100644 --- a/tests/test_git_history.py +++ b/tests/test_git_history.py @@ -29,17 +29,17 @@ def repo(tmpdir): json.dumps( [ { - "id": 1, - "item": "Gin", - "version": "v1", - "commit": "commit1", + "_id": 1, + "_item": "Gin", + "_version": "v1", + "_commit": "commit1", "rowid": 5, }, { - "id": 2, - "item": "Tonic", - "version": "v1", - "commit": "commit1", + "_id": 2, + "_item": "Tonic", + "_version": "v1", + "_commit": "commit1", "rowid": 6, }, ] @@ -50,8 +50,8 @@ def repo(tmpdir): json.dumps( [ { - "id_": 1, - "version_": "Gin", + "_id_": 1, + "_version_": "Gin", } ] ), @@ -110,9 +110,27 @@ def repo(tmpdir): (repo_dir / "items-with-reserved-columns.json").write_text( json.dumps( [ - {"id": 1, "item": "Gin", "version": "v1", "commit": "commit1"}, - {"id": 2, "item": "Tonic 2", "version": "v1", "commit": "commit1"}, - {"id": 3, "item": "Rum", "version": "v1", "commit": "commit1"}, + { + "_id": 1, + "_item": "Gin", + "_version": "v1", + "_commit": "commit1", + "rowid": 5, + }, + { + "_id": 2, + "_item": "Tonic 2", + "_version": "v1", + "_commit": "commit1", + "rowid": 6, + }, + { + "_id": 3, + "_item": "Rum", + "_version": "v1", + "_commit": "commit1", + "rowid": 7, + }, ] ), "utf-8", @@ -138,7 +156,7 @@ def test_file_without_id(repo, tmpdir): "CREATE TABLE [items] (\n" " [item_id] INTEGER,\n" " [name] TEXT,\n" - " [commit] TEXT REFERENCES [commits]([hash])\n" + " [_commit] TEXT REFERENCES [commits]([hash])\n" ");" ) assert db["commits"].count == 2 @@ -176,29 +194,29 @@ def test_file_with_id(repo, tmpdir): " [commit_at] TEXT\n" ");\n" "CREATE TABLE [items] (\n" - " [id] TEXT PRIMARY KEY,\n" + " [_id] TEXT PRIMARY KEY,\n" " [item_id] INTEGER,\n" " [name] TEXT\n" ");\n" "CREATE TABLE [item_versions] (\n" - " [item] TEXT REFERENCES [items]([id]),\n" - " [version] INTEGER,\n" - " [commit] TEXT REFERENCES [commits]([hash]),\n" + " [_item] TEXT REFERENCES [items]([_id]),\n" + " [_version] INTEGER,\n" + " [_commit] TEXT REFERENCES [commits]([hash]),\n" " [item_id] INTEGER,\n" " [name] TEXT,\n" - " PRIMARY KEY ([item], [version])\n" + " PRIMARY KEY ([_item], [_version])\n" ");" ) assert db["commits"].count == 2 # Should have no duplicates item_versions = [ - r for r in db.query("select item_id, version, name from item_versions") + r for r in db.query("select item_id, _version, name from item_versions") ] assert item_versions == [ - {"item_id": 1, "version": 1, "name": "Gin"}, - {"item_id": 2, "version": 1, "name": "Tonic"}, - {"item_id": 2, "version": 2, "name": "Tonic 2"}, - {"item_id": 3, "version": 1, "name": "Rum"}, + {"item_id": 1, "_version": 1, "name": "Gin"}, + {"item_id": 2, "_version": 1, "name": "Tonic"}, + {"item_id": 2, "_version": 2, "name": "Tonic 2"}, + {"item_id": 3, "_version": 1, "name": "Rum"}, ] @@ -215,7 +233,7 @@ def test_file_with_reserved_columns(repo, tmpdir): "--repo", str(repo), "--id", - "id", + "_id", ], catch_exceptions=False, ) @@ -227,113 +245,63 @@ def test_file_with_reserved_columns(repo, tmpdir): " [commit_at] TEXT\n" ");\n" "CREATE TABLE [items] (\n" - " [id] TEXT PRIMARY KEY,\n" - " [item_] TEXT,\n" - " [version_] TEXT,\n" - " [commit_] TEXT,\n" + " [_id] TEXT PRIMARY KEY,\n" + " [_id_] INTEGER,\n" + " [_item_] TEXT,\n" + " [_version_] TEXT,\n" + " [_commit_] TEXT,\n" " [rowid_] INTEGER\n" ");\n" "CREATE TABLE [item_versions] (\n" - " [item] TEXT REFERENCES [items]([id]),\n" - " [version] INTEGER,\n" - " [commit] TEXT REFERENCES [commits]([hash]),\n" - " [id] INTEGER,\n" - " [item_] TEXT,\n" - " [version_] TEXT,\n" - " [commit_] TEXT,\n" + " [_item] TEXT REFERENCES [items]([_id]),\n" + " [_version] INTEGER,\n" + " [_commit] TEXT REFERENCES [commits]([hash]),\n" + " [_id_] INTEGER,\n" + " [_item_] TEXT,\n" + " [_version_] TEXT,\n" + " [_commit_] TEXT,\n" " [rowid_] INTEGER,\n" - " PRIMARY KEY ([item], [version])\n" + " PRIMARY KEY ([_item], [_version])\n" ");" ) item_versions = [ - r for r in db.query("select id, item_, version_, commit_ from item_versions") + r + for r in db.query( + "select _id_, _item_, _version_, _commit_, rowid_ from item_versions" + ) ] assert item_versions == [ - {"id": 1, "item_": "Gin", "version_": "v1", "commit_": "commit1"}, - {"id": 2, "item_": "Tonic", "version_": "v1", "commit_": "commit1"}, - {"id": 1, "item_": "Gin", "version_": "v1", "commit_": "commit1"}, - {"id": 2, "item_": "Tonic 2", "version_": "v1", "commit_": "commit1"}, - {"id": 3, "item_": "Rum", "version_": "v1", "commit_": "commit1"}, + { + "_id_": 1, + "_item_": "Gin", + "_version_": "v1", + "_commit_": "commit1", + "rowid_": 5, + }, + { + "_id_": 2, + "_item_": "Tonic", + "_version_": "v1", + "_commit_": "commit1", + "rowid_": 6, + }, + { + "_id_": 2, + "_item_": "Tonic 2", + "_version_": "v1", + "_commit_": "commit1", + "rowid_": 6, + }, + { + "_id_": 3, + "_item_": "Rum", + "_version_": "v1", + "_commit_": "commit1", + "rowid_": 7, + }, ] -def test_more_than_one_id_makes_id_reserved(repo, tmpdir): - # If we use "--id id --id version" then id is converted to id_ - # so we can add our own id_ that is a hash of those two columns - runner = CliRunner() - db_path = str(tmpdir / "db.db") - with runner.isolated_filesystem(): - result = runner.invoke( - cli, - [ - "file", - db_path, - str(repo / "items-with-reserved-columns.json"), - "--repo", - str(repo), - "--id", - "id", - "--id", - "version", - ], - catch_exceptions=False, - ) - assert result.exit_code == 0 - db = sqlite_utils.Database(db_path) - assert db.schema == ( - "CREATE TABLE [commits] (\n" - " [hash] TEXT PRIMARY KEY,\n" - " [commit_at] TEXT\n" - ");\n" - "CREATE TABLE [items] (\n" - " [id] TEXT PRIMARY KEY,\n" - " [id_] INTEGER,\n" - " [item_] TEXT,\n" - " [version_] TEXT,\n" - " [commit_] TEXT,\n" - " [rowid_] INTEGER\n" - ");\n" - "CREATE TABLE [item_versions] (\n" - " [item] TEXT REFERENCES [items]([id]),\n" - " [version] INTEGER,\n" - " [commit] TEXT REFERENCES [commits]([hash]),\n" - " [id_] INTEGER,\n" - " [item_] TEXT,\n" - " [version_] TEXT,\n" - " [commit_] TEXT,\n" - " [rowid_] INTEGER,\n" - " PRIMARY KEY ([item], [version])\n" - ");" - ) - - -@pytest.mark.parametrize("specify_id", (True, False)) -def test_file_with_banned_columns(repo, tmpdir, specify_id): - runner = CliRunner() - db_path = str(tmpdir / "db.db") - with runner.isolated_filesystem(): - result = runner.invoke( - cli, - [ - "file", - db_path, - str(repo / "items-with-banned-columns.json"), - "--repo", - str(repo), - ] - + (["--id", "id_"] if specify_id else []), - catch_exceptions=False, - ) - assert result.exit_code == 1 - assert result.output.strip() == ( - "Error: Column ['id_', 'version_'] is one of these banned columns: ['commit_', 'id_', 'item_', 'version_']\n" - "{\n" - ' "id_": 1,\n' - ' "version_": "Gin"\n' - "}" - ) - - @pytest.mark.parametrize("file", ("trees.csv", "trees.tsv")) def test_csv_tsv(repo, tmpdir, file): runner = CliRunner() @@ -361,16 +329,16 @@ def test_csv_tsv(repo, tmpdir, file): " [commit_at] TEXT\n" ");\n" "CREATE TABLE [items] (\n" - " [id] TEXT PRIMARY KEY,\n" + " [_id] TEXT PRIMARY KEY,\n" " [TreeID] TEXT,\n" " [name] TEXT\n" ");\n" "CREATE TABLE [item_versions] (\n" - " [item] TEXT REFERENCES [items]([id]),\n" - " [version] INTEGER,\n" - " [commit] TEXT REFERENCES [commits]([hash]),\n" + " [_item] TEXT REFERENCES [items]([_id]),\n" + " [_version] INTEGER,\n" + " [_commit] TEXT REFERENCES [commits]([hash]),\n" " [TreeID] TEXT,\n" " [name] TEXT,\n" - " PRIMARY KEY ([item], [version])\n" + " PRIMARY KEY ([_item], [_version])\n" ");" )