From 93b4bb04c887c5a26b3370c8d132c72d9fc38ae4 Mon Sep 17 00:00:00 2001 From: "Eric O. Korman" Date: Mon, 5 Aug 2024 09:15:02 -0500 Subject: [PATCH] don't allow commas in model and dataset names (#700) --- api/tests/unit-tests/schemas/test_core.py | 8 +++ api/valor_api/schemas/types.py | 16 +++++ .../sql/00000013_disallow_commas.down.sql | 0 .../sql/00000013_disallow_commas.up.sql | 63 +++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 migrations/sql/00000013_disallow_commas.down.sql create mode 100644 migrations/sql/00000013_disallow_commas.up.sql diff --git a/api/tests/unit-tests/schemas/test_core.py b/api/tests/unit-tests/schemas/test_core.py index 08d388206..f5b75bf94 100644 --- a/api/tests/unit-tests/schemas/test_core.py +++ b/api/tests/unit-tests/schemas/test_core.py @@ -66,6 +66,10 @@ def test_dataset(metadata): metadata=[{123: 12434}, "123"], # type: ignore - purposefully throwing error ) + with pytest.raises(ValidationError) as exc_info: + schemas.Dataset(name="name,with,commas") + assert "cannot contain commas" in str(exc_info) + def test_model(metadata): # valid @@ -111,6 +115,10 @@ def test_model(metadata): metadata=[{123: 12434}, "123"], # type: ignore - purposefully throwing error ) + with pytest.raises(ValidationError) as exc_info: + schemas.Model(name="name,with,commas") + assert "cannot contain commas" in str(exc_info) + def test_datum(metadata): # valid diff --git a/api/valor_api/schemas/types.py b/api/valor_api/schemas/types.py index a4b569968..96d2cc61b 100644 --- a/api/valor_api/schemas/types.py +++ b/api/valor_api/schemas/types.py @@ -551,6 +551,14 @@ def validate_metadata_values(cls, v: dict) -> dict: validate_metadata(v) return v + @field_validator("name") + @classmethod + def validate_name_no_commans(cls, v: str) -> str: + """Validates the 'name' field has no commas in it.""" + if "," in v: + raise ValueError("Dataset names cannot contain commas.") + return v + class Model(BaseModel): """ @@ -575,6 +583,14 @@ def validate_name(cls, v: str) -> str: validate_type_string(v) return v + @field_validator("name") + @classmethod + def validate_name_no_commans(cls, v: str) -> str: + """Validates the 'name' field has no commas in it.""" + if "," in v: + raise ValueError("Model names cannot contain commas.") + return v + @field_validator("metadata") @classmethod def validate_metadata_values(cls, v: dict) -> dict: diff --git a/migrations/sql/00000013_disallow_commas.down.sql b/migrations/sql/00000013_disallow_commas.down.sql new file mode 100644 index 000000000..e69de29bb diff --git a/migrations/sql/00000013_disallow_commas.up.sql b/migrations/sql/00000013_disallow_commas.up.sql new file mode 100644 index 000000000..a647ca161 --- /dev/null +++ b/migrations/sql/00000013_disallow_commas.up.sql @@ -0,0 +1,63 @@ +-- this migration goes through dataset names and model names and replaces commas with underscores +-- if the resulting name happens to exist, it adds underscores until it gets a name that doesn't +-- this will update the following tables: Model, Dataset, Evaluation + +-- Function to get a unique name by adding underscores +CREATE OR REPLACE FUNCTION get_unique_name(base_name TEXT, table_name TEXT) +RETURNS TEXT AS $$ +DECLARE + unique_name TEXT := base_name; + name_exists INT; +BEGIN + EXECUTE format('SELECT COUNT(*) FROM %I WHERE name = $1', table_name) INTO name_exists USING unique_name; + + WHILE name_exists > 0 LOOP + unique_name := unique_name || '_'; + EXECUTE format('SELECT COUNT(*) FROM %I WHERE name = $1', table_name) INTO name_exists USING unique_name; + END LOOP; + + RETURN unique_name; +END; +$$ LANGUAGE plpgsql; + +DO $$ +DECLARE + old_name TEXT; + new_name TEXT; +BEGIN + FOR old_name IN SELECT name FROM model WHERE POSITION(',' IN name) > 0 LOOP + new_name := get_unique_name(REPLACE(old_name, ',', '_'), 'model'); + + UPDATE Model SET name = new_name WHERE name = old_name; + + UPDATE Evaluation SET model_name = new_name WHERE model_name = old_name; + END LOOP; +END; +$$; + +DO $$ +DECLARE + old_name TEXT; + new_name TEXT; +BEGIN + FOR old_name IN SELECT name FROM Dataset WHERE POSITION(',' IN name) > 0 LOOP + new_name := get_unique_name(REPLACE(old_name, ',', '_'), 'dataset'); + + UPDATE Dataset SET name = new_name WHERE name = old_name; + + UPDATE Evaluation + SET dataset_names = ( + SELECT jsonb_agg( + CASE + WHEN elem = old_name THEN new_name + ELSE elem + END + ) + FROM jsonb_array_elements_text(dataset_names) AS elem + ) + WHERE dataset_names @> jsonb_build_array(old_name); + END LOOP; +END; +$$; + +DROP FUNCTION get_unique_name; \ No newline at end of file