Skip to content

Commit

Permalink
Convert longer media varchar fields to text in the catalog db (#4357
Browse files Browse the repository at this point in the history
)

* Convert longer media `varchar` fields to `text` in the catalog db

* Update doc generation to accept text columns
  • Loading branch information
AetherUnbound authored May 24, 2024
1 parent 6e8d0bf commit 4798866
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 69 deletions.
1 change: 1 addition & 0 deletions catalog/utilities/media_props_gen/helpers/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"jsonb",
"timestamp with time zone",
"character varying",
"text",
]
SQL_TYPE_REGEX = re.compile(f"({'|'.join(SQL_TYPES)})")
CREATE_TABLE_REGEX = re.compile(r"CREATE\s+TABLE\s+\w+\.(\w+)\s+\(([\s\S]*?)\);")
Expand Down
14 changes: 7 additions & 7 deletions docker/upstream_db/0003_openledger_image_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ CREATE TABLE public.image (
ingestion_type character varying(80),
provider character varying(80),
source character varying(80),
foreign_identifier character varying(3000),
foreign_landing_url character varying(1000),
url character varying(3000) NOT NULL,
thumbnail character varying(3000),
foreign_identifier text,
foreign_landing_url text,
url text NOT NULL,
thumbnail text,
width integer,
height integer,
filesize integer,
license character varying(50) NOT NULL,
license_version character varying(25),
creator character varying(2000),
creator_url character varying(2000),
title character varying(5000),
creator text,
creator_url text,
title text,
meta_data jsonb,
tags jsonb,
watermarked boolean,
Expand Down
16 changes: 8 additions & 8 deletions docker/upstream_db/0006_openledger_audio_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ CREATE TABLE public.audio (
ingestion_type character varying(80),
provider character varying(80),
source character varying(80),
foreign_identifier character varying(3000),
foreign_landing_url character varying(1000),
url character varying(3000) NOT NULL,
thumbnail character varying(3000),
foreign_identifier text,
foreign_landing_url text,
url text NOT NULL,
thumbnail text,
filetype character varying(5),
duration integer,
bit_rate integer,
Expand All @@ -37,16 +37,16 @@ CREATE TABLE public.audio (
filesize integer,
license character varying(50) NOT NULL,
license_version character varying(25),
creator character varying(2000),
creator_url character varying(2000),
title character varying(5000),
creator text,
creator_url text,
title text,
meta_data jsonb,
tags jsonb,
watermarked boolean,
last_synced_with_source timestamp with time zone,
removed_from_source boolean NOT NULL,
standardized_popularity double precision,
audio_set_foreign_identifier character varying(1000)
audio_set_foreign_identifier text
);


Expand Down
18 changes: 9 additions & 9 deletions docker/upstream_db/0007_openledger_audio_view.sql
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ CREATE VIEW audioset_view AS
-- below). More info here:
-- https://github.com/WordPress/openverse-catalog/issues/658
SELECT DISTINCT ON (audio.audio_set_foreign_identifier, audio.provider)
(audio.audio_set_foreign_identifier::text) ::character varying(1000) AS foreign_identifier,
(audio.audio_set ->> 'title'::text) ::character varying(2000) AS title,
(audio.audio_set ->> 'foreign_landing_url'::text) ::character varying(1000) AS foreign_landing_url,
(audio.audio_set ->> 'creator'::text) ::character varying(2000) AS creator,
(audio.audio_set ->> 'creator_url'::text) ::character varying(2000) AS creator_url,
(audio.audio_set ->> 'url'::text) ::character varying(1000) AS url,
(audio.audio_set ->> 'filesize'::text) ::integer AS filesize,
(audio.audio_set ->> 'filetype'::text) ::character varying(80) AS filetype,
(audio.audio_set ->> 'thumbnail'::text) ::character varying(1000) AS thumbnail,
audio.audio_set_foreign_identifier ::text AS foreign_identifier,
audio.audio_set ->> 'title' ::text AS title,
audio.audio_set ->> 'foreign_landing_url' ::text AS foreign_landing_url,
audio.audio_set ->> 'creator' ::text AS creator,
audio.audio_set ->> 'creator_url' ::text AS creator_url,
audio.audio_set ->> 'url' ::text AS url,
(audio.audio_set ->> 'filesize'::text) ::integer AS filesize,
(audio.audio_set ->> 'filetype'::text) ::character varying(80) AS filetype,
audio.audio_set ->> 'thumbnail' ::text AS thumbnail,
audio.provider
FROM audio
WHERE (audio.audio_set_foreign_identifier IS NOT NULL AND audio.audio_set IS NOT NULL)
Expand Down
30 changes: 15 additions & 15 deletions documentation/meta/media_properties/catalog.md

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions ingestion_server/test/mock_schemas/audio.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ CREATE TABLE public.audio (
created_on timestamp with time zone NOT NULL,
updated_on timestamp with time zone NOT NULL,
identifier uuid NOT NULL,
foreign_identifier character varying(1000),
title character varying(2000),
foreign_landing_url character varying(1000),
creator character varying(2000),
creator_url character varying(2000),
url character varying(1000),
foreign_identifier text,
title text,
foreign_landing_url text,
creator text,
creator_url text,
url text,
filesize integer,
watermarked boolean,
license character varying(50) NOT NULL,
Expand All @@ -53,9 +53,9 @@ CREATE TABLE public.audio (
bit_rate integer,
sample_rate integer,
alt_files jsonb,
thumbnail character varying(1000),
filetype character varying(80),
audio_set_foreign_identifier character varying(1000)
thumbnail text,
filetype text,
audio_set_foreign_identifier text
);


Expand Down
14 changes: 7 additions & 7 deletions ingestion_server/test/mock_schemas/audioset.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ CREATE TABLE public.audioset (
id integer NOT NULL,
created_on timestamp with time zone NOT NULL,
updated_on timestamp with time zone NOT NULL,
foreign_identifier character varying(1000),
title character varying(2000),
foreign_landing_url character varying(1000),
creator character varying(2000),
creator_url character varying(2000),
url character varying(1000),
foreign_identifier text,
title text,
foreign_landing_url text,
creator text,
creator_url text,
url text,
filesize integer,
filetype character varying(80),
thumbnail character varying(1000),
thumbnail text,
provider character varying(80)
);

Expand Down
14 changes: 7 additions & 7 deletions ingestion_server/test/mock_schemas/audioset_view.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ SET row_security = off;
--

CREATE VIEW public.audioset_view AS
SELECT DISTINCT ((audio.audio_set ->> 'foreign_identifier'::text))::character varying(1000) AS foreign_identifier,
((audio.audio_set ->> 'title'::text))::character varying(2000) AS title,
((audio.audio_set ->> 'foreign_landing_url'::text))::character varying(1000) AS foreign_landing_url,
((audio.audio_set ->> 'creator'::text))::character varying(2000) AS creator,
((audio.audio_set ->> 'creator_url'::text))::character varying(2000) AS creator_url,
((audio.audio_set ->> 'url'::text))::character varying(1000) AS url,
SELECT DISTINCT (audio.audio_set ->> 'foreign_identifier'::text) AS foreign_identifier,
(audio.audio_set ->> 'title'::text) AS title,
(audio.audio_set ->> 'foreign_landing_url'::text) AS foreign_landing_url,
(audio.audio_set ->> 'creator'::text) AS creator,
(audio.audio_set ->> 'creator_url'::text) AS creator_url,
(audio.audio_set ->> 'url'::text) AS url,
((audio.audio_set ->> 'filesize'::text))::integer AS filesize,
((audio.audio_set ->> 'filetype'::text))::character varying(80) AS filetype,
((audio.audio_set ->> 'thumbnail'::text))::character varying(1000) AS thumbnail,
(audio.audio_set ->> 'thumbnail'::text) AS thumbnail,
audio.provider
FROM public.audio
WHERE (audio.audio_set IS NOT NULL);
Expand Down
14 changes: 7 additions & 7 deletions ingestion_server/test/mock_schemas/image.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,18 @@ CREATE TABLE public.image (
identifier uuid NOT NULL,
provider character varying(80),
source character varying(80),
foreign_identifier character varying(1000),
foreign_landing_url character varying(1000),
url character varying(1000),
thumbnail character varying(1000),
foreign_identifier text,
foreign_landing_url text,
url text,
thumbnail text,
width integer,
height integer,
filesize integer,
license character varying(50) NOT NULL,
license_version character varying(25),
creator character varying(2000),
creator_url character varying(2000),
title character varying(5000),
creator text,
creator_url text,
title text,
last_synced_with_source timestamp with time zone,
removed_from_source boolean NOT NULL,
meta_data jsonb,
Expand Down

0 comments on commit 4798866

Please sign in to comment.