diff --git a/src/cc_catalog_airflow/dags/common/storage/audio.py b/src/cc_catalog_airflow/dags/common/storage/audio.py index 0f2e603f3..3d5754484 100644 --- a/src/cc_catalog_airflow/dags/common/storage/audio.py +++ b/src/cc_catalog_airflow/dags/common/storage/audio.py @@ -47,6 +47,18 @@ columns.JSONColumn( name='tags', required=False ), + columns.BooleanColumn( + name='watermarked', required=False, + ), + columns.StringColumn( + name='provider', required=False, size=80, truncate=False + ), + columns.StringColumn( + name='source', required=False, size=80, truncate=False + ), + columns.StringColumn( + name="ingestion_type", required=False, size=80, truncate=False + ), columns.IntegerColumn( name='duration', required=False ), @@ -70,15 +82,6 @@ # Alternative files: url, filesize, bit_rate, sample_rate name='alt_audio_files', required=False ), - columns.StringColumn( - name='provider', required=False, size=80, truncate=False - ), - columns.StringColumn( - name='source', required=False, size=80, truncate=False - ), - columns.StringColumn( - name="ingestion_type", required=False, size=80, truncate=False - ), ] Audio = namedtuple("Audio", [c.NAME for c in AUDIO_TSV_COLUMNS]) @@ -126,6 +129,7 @@ def add_item( title: Optional[str] = None, meta_data: Optional[Union[Dict, str]] = None, raw_tags: Optional[Union[list, str]] = None, + watermarked: Optional[bool] = False, duration: Optional[int] = None, bit_rate: Optional[int] = None, sample_rate: Optional[int] = None, @@ -137,7 +141,7 @@ def add_item( set_url: Optional[str] = None, alt_audio_files: Optional[Dict] = None, source: Optional[str] = None, - ingestion_type: Optional[str] = 'commoncrawl', + ingestion_type: Optional[str] = None, ): """ Add information for a single audio to the AudioStore. @@ -208,6 +212,7 @@ def add_item( 'title': title, 'meta_data': meta_data, 'raw_tags': raw_tags, + 'watermarked': watermarked, 'duration': duration, 'bit_rate': bit_rate, 'sample_rate': sample_rate, diff --git a/src/cc_catalog_airflow/dags/common/storage/image.py b/src/cc_catalog_airflow/dags/common/storage/image.py index 066fb5e1d..8f5e388b8 100644 --- a/src/cc_catalog_airflow/dags/common/storage/image.py +++ b/src/cc_catalog_airflow/dags/common/storage/image.py @@ -116,7 +116,7 @@ def add_item( raw_tags=None, watermarked: Optional[str] = "f", source: Optional[str] = None, - ingestion_type: Optional[str] = 'commoncrawl', + ingestion_type: Optional[str] = None, ): """ Add information for a single image to the ImageStore. diff --git a/src/cc_catalog_airflow/dags/common/storage/media.py b/src/cc_catalog_airflow/dags/common/storage/media.py index 020ff5d4c..c9e9855bc 100644 --- a/src/cc_catalog_airflow/dags/common/storage/media.py +++ b/src/cc_catalog_airflow/dags/common/storage/media.py @@ -30,6 +30,9 @@ "pdm", } +COMMON_CRAWL = 'commoncrawl' +PROVIDER_API = 'provider_api' + class MediaStore(metaclass=abc.ABCMeta): """ @@ -137,8 +140,8 @@ def clean_media_metadata(self, **media_data) -> Optional[dict]: and for common metadata we: - remove `license_url` and `raw_license_url`, - validate `license_` and `license_version`, - - enrich `metadata` and `tags`, - - remove `raw_tags` are removed, + - enrich `metadata`, + - replace `raw_tags` with enriched `tags`, - validate `source`, - add `provider`, - add `filesize` (with value of None) @@ -153,6 +156,14 @@ def clean_media_metadata(self, **media_data) -> Optional[dict]: media_data.get('source'), self._PROVIDER ) + # Add ingestion_type column value based on `source`. + # The implementation is based on `ingestion_column` + if media_data.get('ingestion_type') is None: + if media_data['source'] == 'commoncrawl': + media_data['ingestion_type'] = 'commoncrawl' + else: + media_data['ingestion_type'] = 'provider_api' + media_data['tags'] = self._enrich_tags( media_data.pop('raw_tags', None) ) diff --git a/src/cc_catalog_airflow/dags/common/storage/test_audio.py b/src/cc_catalog_airflow/dags/common/storage/test_audio.py index a39668ca9..9cbfdb965 100644 --- a/src/cc_catalog_airflow/dags/common/storage/test_audio.py +++ b/src/cc_catalog_airflow/dags/common/storage/test_audio.py @@ -33,6 +33,7 @@ 'creator_url': 'https://creatorurl.com', 'title': 'agreatpicture', 'meta_data': {}, + 'watermarked': None, 'raw_tags': {}, 'bit_rate': None, 'sample_rate': None, @@ -221,6 +222,7 @@ def default_audio_args( title='agreatsong', meta_data={"description": "cat song"}, tags={"name": "tag1", "provider": "testing"}, + watermarked=None, duration=100, bit_rate=None, sample_rate=None, @@ -262,6 +264,10 @@ def test_create_tsv_row_creates_alt_audio_files( 'agreatsong', '{"description": "cat song"}', '{"name": "tag1", "provider": "testing"}', + '\\N', + 'testing_provider', + 'testing_source', + 'provider_api', '100', '\\N', '\\N', @@ -271,9 +277,6 @@ def test_create_tsv_row_creates_alt_audio_files( '[{"url": ' '"http://alternative.com/audio.mp3", "filesize": "123", "bit_rate": "41000", ' '"sample_rate": "16000"}]', - 'testing_provider', - 'testing_source', - 'provider_api', ]) + '\n' assert actual_row == expected_row @@ -308,6 +311,10 @@ def test_create_tsv_row_creates_audio_set( 'agreatsong', '{"description": "cat song"}', '{"name": "tag1", "provider": "testing"}', + '\\N', + 'testing_provider', + 'testing_source', + 'provider_api', '100', '\\N', '\\N', @@ -316,10 +323,6 @@ def test_create_tsv_row_creates_audio_set( '{"audio_set": "test_audio_set", "set_url": "test.com", ' '"set_position": "1", "set_thumbnail": "thumbnail.jpg"}', '\\N', - 'testing_provider', - 'testing_source', - 'provider_api', - ]) + '\n' assert actual_row == expected_row @@ -438,6 +441,7 @@ def mock_validate_url(url_string): 'title': 'agreatsong', 'meta_data': {'description': 'a song about cat'}, 'tags': [{'name': 'tag1', 'provider': 'testing'}], + 'watermarked': None, 'bit_rate': 16000, 'sample_rate': 44100, 'category': 'music', @@ -472,6 +476,10 @@ def mock_validate_url(url_string): 'agreatsong', '{"description": "a song about cat"}', '[{"name": "tag1", "provider": "testing"}]', + '\\N', + 'testing_provider', + 'testing_source', + 'provider_api', '200', '16000', '44100', @@ -480,8 +488,5 @@ def mock_validate_url(url_string): '{"audio_set": "album", "set_position": "1", "set_url": "https://album.com/", ' '"set_thumbnail": "https://album.com/thumbnail.jpg"}', '\\N', - 'testing_provider', - 'testing_source', - 'provider_api' ]) + '\n' assert expect_row == actual_row diff --git a/src/cc_catalog_airflow/dags/util/loader/column_names.py b/src/cc_catalog_airflow/dags/util/loader/column_names.py index 5ff4f00dc..fb9b49b9b 100644 --- a/src/cc_catalog_airflow/dags/util/loader/column_names.py +++ b/src/cc_catalog_airflow/dags/util/loader/column_names.py @@ -25,3 +25,10 @@ UPDATED_ON = 'updated_on' LAST_SYNCED = 'last_synced_with_source' REMOVED = 'removed_from_source' +DURATION = 'duration' +BIT_RATE = 'bit_rate' +SAMPLE_RATE = 'sample_rate' +CATEGORY = 'category' +GENRE = 'genre' +AUDIO_SET = 'audio_set' +ALT_AUDIO_FILES = 'alt_audio_files' diff --git a/src/cc_catalog_airflow/dags/util/loader/ingestion_column.py b/src/cc_catalog_airflow/dags/util/loader/ingestion_column.py index 863b0629b..aa2908522 100644 --- a/src/cc_catalog_airflow/dags/util/loader/ingestion_column.py +++ b/src/cc_catalog_airflow/dags/util/loader/ingestion_column.py @@ -6,7 +6,9 @@ import logging import os +from common.storage.audio import AUDIO_TSV_COLUMNS from common.storage.image import IMAGE_TSV_COLUMNS +from common.storage import media logger = logging.getLogger(__name__) @@ -25,12 +27,11 @@ def check_and_fix_tsv_file(tsv_file_name): # If no media file is set in the filename, it is # probably image media_type = 'image' - old_cols_number = len(IMAGE_TSV_COLUMNS) - 1 if media_type == 'audio': - # TODO: when audio is added: - # old_cols_number = len(AUDIO_TSV_COLUMNS) - 1 - old_cols_number = 0 - new_cols_number = old_cols_number + 1 + new_cols_number = len(AUDIO_TSV_COLUMNS) + else: + new_cols_number = len(IMAGE_TSV_COLUMNS) + old_cols_number = new_cols_number - 1 with open(tsv_file_name) as f: test_line = f.readline() line_list = [word.strip() for word in test_line.split('\t')] @@ -51,8 +52,8 @@ def check_and_fix_tsv_file(tsv_file_name): def _add_ingestion_type(tsv_file_name, source): - COMMON_CRAWL = 'commoncrawl' - PROVIDER_API = 'provider_api' + COMMON_CRAWL = media.COMMON_CRAWL + PROVIDER_API = media.PROVIDER_API ingestion_type = source if source == COMMON_CRAWL else PROVIDER_API logger.debug(f'Found source: {source}') logger.info( diff --git a/src/cc_catalog_airflow/dags/util/loader/loader.py b/src/cc_catalog_airflow/dags/util/loader/loader.py index 6f8a5e4dc..e062b7606 100644 --- a/src/cc_catalog_airflow/dags/util/loader/loader.py +++ b/src/cc_catalog_airflow/dags/util/loader/loader.py @@ -23,8 +23,12 @@ def load_local_data(output_dir, postgres_conn_id, identifier, overwrite=False): def copy_to_s3(output_dir, bucket, identifier, aws_conn_id): tsv_file_name = paths.get_staged_file(output_dir, identifier) + media_type = _extract_media_type(tsv_file_name) ingestion_column.check_and_fix_tsv_file(tsv_file_name) - s3.copy_file_to_s3_staging(identifier, tsv_file_name, bucket, aws_conn_id) + s3.copy_file_to_s3_staging( + identifier, tsv_file_name, bucket, aws_conn_id, + media_prefix=media_type + ) def load_s3_data( @@ -41,7 +45,7 @@ def load_s3_data( if media_type is None: media_type = 'image' tsv_key = s3.get_staged_s3_object( - identifier, bucket, aws_conn_id + identifier, bucket, aws_conn_id, media_prefix=media_type ) sql.load_s3_data_to_intermediate_table( postgres_conn_id, diff --git a/src/cc_catalog_airflow/dags/util/loader/sql.py b/src/cc_catalog_airflow/dags/util/loader/sql.py index 5a594b618..ecce875be 100644 --- a/src/cc_catalog_airflow/dags/util/loader/sql.py +++ b/src/cc_catalog_airflow/dags/util/loader/sql.py @@ -12,6 +12,7 @@ LOAD_TABLE_NAME_STUB = 'provider_data_' IMAGE_TABLE_NAME = 'image' +AUDIO_TABLE_NAME = 'audio' DB_USER_NAME = 'deploy' NOW = 'NOW()' FALSE = "'f'" @@ -45,8 +46,37 @@ def create_loading_table( media_type = 'image' load_table = _get_load_table_name(identifier, media_type=media_type) postgres = PostgresHook(postgres_conn_id=postgres_conn_id) - postgres.run( - dedent( + if media_type == 'audio': + table_creation_query = dedent( + f''' + CREATE TABLE public.{load_table} ( + {col.FOREIGN_ID} character varying(3000), + {col.LANDING_URL} character varying(1000), + {col.DIRECT_URL} character varying(3000), + {col.THUMBNAIL} character varying(3000), + {col.FILESIZE} integer, + {col.LICENSE} character varying(50), + {col.LICENSE_VERSION} character varying(25), + {col.CREATOR} character varying(2000), + {col.CREATOR_URL} character varying(2000), + {col.TITLE} character varying(5000), + {col.META_DATA} jsonb, + {col.TAGS} jsonb, + {col.PROVIDER} character varying(80), + {col.SOURCE} character varying(80), + {col.INGESTION_TYPE} character varying(80), + {col.DURATION} integer, + {col.BIT_RATE} integer, + {col.SAMPLE_RATE} integer, + {col.CATEGORY} character varying(100), + {col.GENRE} jsonb, + {col.AUDIO_SET} jsonb, + {col.ALT_AUDIO_FILES} jsonb + ); + ''' + ) + else: + table_creation_query = dedent( f''' CREATE TABLE public.{load_table} ( {col.FOREIGN_ID} character varying(3000), @@ -70,7 +100,7 @@ def create_loading_table( ); ''' ) - ) + postgres.run(table_creation_query) postgres.run( f'ALTER TABLE public.{load_table} OWNER TO {DB_USER_NAME};' ) @@ -203,7 +233,7 @@ def _clean_intermediate_table_data( def upsert_records_to_db_table( postgres_conn_id, identifier, - db_table=IMAGE_TABLE_NAME, + db_table=None, media_type='image', ): def _newest_non_null(column: str) -> str: @@ -230,34 +260,66 @@ def _merge_jsonb_arrays(column: str) -> str: EXCLUDED.{column}, old.{column} )''' + if db_table is None: + db_table = AUDIO_TABLE_NAME \ + if media_type == 'audio' else IMAGE_TABLE_NAME load_table = _get_load_table_name(identifier, media_type=media_type) logger.info(f'Upserting new records into {db_table}.') postgres = PostgresHook(postgres_conn_id=postgres_conn_id) column_inserts = { - col.CREATED_ON: NOW, - col.UPDATED_ON: NOW, - col.INGESTION_TYPE: col.INGESTION_TYPE, - col.PROVIDER: col.PROVIDER, - col.SOURCE: col.SOURCE, - col.FOREIGN_ID: col.FOREIGN_ID, - col.LANDING_URL: col.LANDING_URL, - col.DIRECT_URL: col.DIRECT_URL, - col.THUMBNAIL: col.THUMBNAIL, - col.WIDTH: col.WIDTH, - col.HEIGHT: col.HEIGHT, - col.FILESIZE: col.FILESIZE, - col.LICENSE: col.LICENSE, - col.LICENSE_VERSION: col.LICENSE_VERSION, - col.CREATOR: col.CREATOR, - col.CREATOR_URL: col.CREATOR_URL, - col.TITLE: col.TITLE, - col.LAST_SYNCED: NOW, - col.REMOVED: FALSE, - col.META_DATA: col.META_DATA, - col.TAGS: col.TAGS, - col.WATERMARKED: col.WATERMARKED + col.CREATED_ON: NOW, + col.UPDATED_ON: NOW, + col.INGESTION_TYPE: col.INGESTION_TYPE, + col.PROVIDER: col.PROVIDER, + col.SOURCE: col.SOURCE, + col.FOREIGN_ID: col.FOREIGN_ID, + col.LANDING_URL: col.LANDING_URL, + col.DIRECT_URL: col.DIRECT_URL, + col.THUMBNAIL: col.THUMBNAIL, + col.FILESIZE: col.FILESIZE, + col.LICENSE: col.LICENSE, + col.LICENSE_VERSION: col.LICENSE_VERSION, + col.CREATOR: col.CREATOR, + col.CREATOR_URL: col.CREATOR_URL, + col.TITLE: col.TITLE, + col.LAST_SYNCED: NOW, + col.REMOVED: FALSE, + col.META_DATA: col.META_DATA, + col.TAGS: col.TAGS, + col.WATERMARKED: col.WATERMARKED, } + if media_type == 'audio': + column_inserts.update({ + col.DURATION: col.DURATION, + col.BIT_RATE: col.BIT_RATE, + col.SAMPLE_RATE: col.SAMPLE_RATE, + col.CATEGORY: col.CATEGORY, + col.GENRE: col.GENRE, + col.AUDIO_SET: col.AUDIO_SET, + col.ALT_AUDIO_FILES: col.ALT_AUDIO_FILES, + }) + else: + column_inserts.update({ + col.WIDTH: col.WIDTH, + col.HEIGHT: col.HEIGHT, + }) + if media_type == 'audio': + media_specific_upsert_query = ( + f'''{_newest_non_null(col.DURATION)}, + {_newest_non_null(col.BIT_RATE)}, + {_newest_non_null(col.SAMPLE_RATE)}, + {_newest_non_null(col.CATEGORY)}, + {_merge_jsonb_arrays(col.GENRE)}, + {_merge_jsonb_objects(col.AUDIO_SET)}, + {_merge_jsonb_objects(col.ALT_AUDIO_FILES)} + ''' + ) + else: + media_specific_upsert_query = ( + f'''{_newest_non_null(col.WIDTH)}, + {_newest_non_null(col.HEIGHT)}''' + ) upsert_query = dedent( f''' INSERT INTO {db_table} AS old ({', '.join(column_inserts.keys())}) @@ -273,17 +335,16 @@ def _merge_jsonb_arrays(column: str) -> str: {_newest_non_null(col.LANDING_URL)}, {_newest_non_null(col.DIRECT_URL)}, {_newest_non_null(col.THUMBNAIL)}, - {_newest_non_null(col.WIDTH)}, - {_newest_non_null(col.HEIGHT)}, {_newest_non_null(col.FILESIZE)}, {_newest_non_null(col.LICENSE)}, {_newest_non_null(col.LICENSE_VERSION)}, {_newest_non_null(col.CREATOR)}, {_newest_non_null(col.CREATOR_URL)}, {_newest_non_null(col.TITLE)}, - {_newest_non_null(col.WATERMARKED)}, {_merge_jsonb_objects(col.META_DATA)}, - {_merge_jsonb_arrays(col.TAGS)} + {_merge_jsonb_arrays(col.TAGS)}, + {_newest_non_null(col.WATERMARKED)}, + {media_specific_upsert_query} ''' ) postgres.run(upsert_query) @@ -292,28 +353,54 @@ def _merge_jsonb_arrays(column: str) -> str: def overwrite_records_in_db_table( postgres_conn_id, identifier, - db_table=IMAGE_TABLE_NAME, + db_table=None, media_type='image' ): + if db_table is None: + db_table = AUDIO_TABLE_NAME \ + if media_type == 'audio' else IMAGE_TABLE_NAME load_table = _get_load_table_name(identifier, media_type=media_type) logger.info(f'Updating records in {db_table}.') postgres = PostgresHook(postgres_conn_id=postgres_conn_id) - columns_to_update = [ - col.LANDING_URL, - col.DIRECT_URL, - col.THUMBNAIL, - col.WIDTH, - col.HEIGHT, - col.FILESIZE, - col.LICENSE, - col.LICENSE_VERSION, - col.CREATOR, - col.CREATOR_URL, - col.TITLE, - col.META_DATA, - col.TAGS, - col.WATERMARKED, - ] + if media_type == 'audio': + columns_to_update = [ + col.LANDING_URL, + col.DIRECT_URL, + col.THUMBNAIL, + col.FILESIZE, + col.LICENSE, + col.LICENSE_VERSION, + col.CREATOR, + col.CREATOR_URL, + col.TITLE, + col.META_DATA, + col.TAGS, + col.WATERMARKED, + col.DURATION, + col.BIT_RATE, + col.SAMPLE_RATE, + col.CATEGORY, + col.GENRE, + col.AUDIO_SET, + col.ALT_AUDIO_FILES, + ] + else: + columns_to_update = [ + col.LANDING_URL, + col.DIRECT_URL, + col.THUMBNAIL, + col.WIDTH, + col.HEIGHT, + col.FILESIZE, + col.LICENSE, + col.LICENSE_VERSION, + col.CREATOR, + col.CREATOR_URL, + col.TITLE, + col.META_DATA, + col.TAGS, + col.WATERMARKED, + ] update_set_string = ',\n'.join( [f'{column} = {load_table}.{column}' for column in columns_to_update] ) diff --git a/src/cc_catalog_airflow/local_postgres/0006_openledger_audio_schema.sql b/src/cc_catalog_airflow/local_postgres/0006_openledger_audio_schema.sql new file mode 100644 index 000000000..e4770fc09 --- /dev/null +++ b/src/cc_catalog_airflow/local_postgres/0006_openledger_audio_schema.sql @@ -0,0 +1,51 @@ +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET client_min_messages = warning; +SET row_security = off; +SET default_tablespace = ''; +SET default_with_oids = false; + +CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public; +COMMENT ON EXTENSION "uuid-ossp" IS 'generate universally unique identifiers (UUIDs)'; + + +CREATE TABLE public.audio ( + identifier uuid PRIMARY KEY DEFAULT public.uuid_generate_v4(), + created_on timestamp with time zone NOT NULL, + updated_on timestamp with time zone NOT NULL, + ingestion_type character varying(80), + provider character varying(80), + source character varying(80), + foreign_identifier character varying(3000), + foreign_landing_url character varying(1000), + url character varying(3000) NOT NULL, + thumbnail character varying(3000), + duration integer, + bit_rate integer, + sample_rate integer, + category character varying(200), + genre jsonb, + audio_set jsonb, + alt_audio_files jsonb, + filesize integer, + license character varying(50) NOT NULL, + license_version character varying(25), + creator character varying(2000), + creator_url character varying(2000), + title character varying(5000), + meta_data jsonb, + tags jsonb, + last_synced_with_source timestamp with time zone, + removed_from_source boolean NOT NULL +); + + +ALTER TABLE public.audio OWNER TO deploy; +CREATE UNIQUE INDEX audio_provider_fid_idx + ON public.audio + USING btree (provider, md5(foreign_identifier)); diff --git a/src/cc_catalog_airflow/local_postgres/Dockerfile b/src/cc_catalog_airflow/local_postgres/Dockerfile index ee37f9082..f30a80e2f 100644 --- a/src/cc_catalog_airflow/local_postgres/Dockerfile +++ b/src/cc_catalog_airflow/local_postgres/Dockerfile @@ -7,4 +7,5 @@ ADD 0002_aws_s3_mock.sql /docker-entrypoint-initdb.d ADD 0003_openledger_image_schema.sql /docker-entrypoint-initdb.d ADD 0004_openledger_image_view.sql /docker-entrypoint-initdb.d ADD 0005_openledger_old_image_schema.sql /docker-entrypoint-initdb.d +ADD 0006_openledger_audio_schema.sql /docker-entrypoint-initdb.d RUN apt-get -y update && apt-get -y install python3-boto3 postgresql-plpython3-13 diff --git a/src/openledger_sql/03_create_audio_schema.sql b/src/openledger_sql/03_create_audio_schema.sql new file mode 100644 index 000000000..e4770fc09 --- /dev/null +++ b/src/openledger_sql/03_create_audio_schema.sql @@ -0,0 +1,51 @@ +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET client_min_messages = warning; +SET row_security = off; +SET default_tablespace = ''; +SET default_with_oids = false; + +CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public; +COMMENT ON EXTENSION "uuid-ossp" IS 'generate universally unique identifiers (UUIDs)'; + + +CREATE TABLE public.audio ( + identifier uuid PRIMARY KEY DEFAULT public.uuid_generate_v4(), + created_on timestamp with time zone NOT NULL, + updated_on timestamp with time zone NOT NULL, + ingestion_type character varying(80), + provider character varying(80), + source character varying(80), + foreign_identifier character varying(3000), + foreign_landing_url character varying(1000), + url character varying(3000) NOT NULL, + thumbnail character varying(3000), + duration integer, + bit_rate integer, + sample_rate integer, + category character varying(200), + genre jsonb, + audio_set jsonb, + alt_audio_files jsonb, + filesize integer, + license character varying(50) NOT NULL, + license_version character varying(25), + creator character varying(2000), + creator_url character varying(2000), + title character varying(5000), + meta_data jsonb, + tags jsonb, + last_synced_with_source timestamp with time zone, + removed_from_source boolean NOT NULL +); + + +ALTER TABLE public.audio OWNER TO deploy; +CREATE UNIQUE INDEX audio_provider_fid_idx + ON public.audio + USING btree (provider, md5(foreign_identifier));