From 4d3a0cf07a22e2eb503bb95dba6ed17b17f34e66 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Wed, 17 May 2023 17:11:48 +0300 Subject: [PATCH] Add a script to generate the media_properties.md --- catalog/justfile | 21 +++ catalog/utilities/media_props_gen/__init__.py | 0 .../media_props_gen/column_parser.py | 117 ++++++++++++ .../generate_media_properties.py | 177 ++++++++++++++++++ .../utilities/media_props_gen/media_props.md | 0 .../utilities/media_props_gen/postamble.md | 0 catalog/utilities/media_props_gen/preamble.md | 9 + documentation/meta/media_properties.md | 76 ++++++++ 8 files changed, 400 insertions(+) create mode 100644 catalog/utilities/media_props_gen/__init__.py create mode 100644 catalog/utilities/media_props_gen/column_parser.py create mode 100644 catalog/utilities/media_props_gen/generate_media_properties.py create mode 100644 catalog/utilities/media_props_gen/media_props.md create mode 100644 catalog/utilities/media_props_gen/postamble.md create mode 100644 catalog/utilities/media_props_gen/preamble.md create mode 100644 documentation/meta/media_properties.md diff --git a/catalog/justfile b/catalog/justfile index 9689c226759..13401edb29a 100644 --- a/catalog/justfile +++ b/catalog/justfile @@ -136,6 +136,27 @@ generate-dag-docs fail_on_diff="false": fi fi + +# Generate the DAG documentation +generate-media-props fail_on_diff="true": + #!/bin/bash + set -e + python utilities/media_props_gen/generate_media_properties.py \&\& chmod 666 utilities/media_props_gen/media_properties.md + # Move the file to the documentation folder + mv utilities/media_props_gen/media_properties.md ../documentation/meta/media_properties.md + echo -n "Running linting..." + # Linting step afterwards is necessary since the generated output differs greatly from what prettier expects + just ../lint prettier ../documentation/meta/media_properties.md &>/dev/null || true + echo "Done!" + if {{ fail_on_diff }}; then + set +e + git diff --exit-code ../documentation/meta/media_properties.md + if [ $? -ne 0 ]; then + printf "\n\n\e[31m!! Changes found in Media properties documentation, please run 'just generate-media-props' locally and commit difference !!\n\n" + exit 1 + fi + fi + # Generate files for a new provider add-provider provider_name endpoint +media_types="image": python3 templates/create_provider_ingester.py "{{ provider_name }}" "{{ endpoint }}" -m {{ media_types }} diff --git a/catalog/utilities/media_props_gen/__init__.py b/catalog/utilities/media_props_gen/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/catalog/utilities/media_props_gen/column_parser.py b/catalog/utilities/media_props_gen/column_parser.py new file mode 100644 index 00000000000..f9b6673c2d2 --- /dev/null +++ b/catalog/utilities/media_props_gen/column_parser.py @@ -0,0 +1,117 @@ +import ast +from pathlib import Path + + +STORAGE_PATH = Path(__file__).parents[2] / "dags" / "common" / "storage" +COLUMNS_PATH = STORAGE_PATH / "columns.py" + +COLUMNS_URL = "https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py" # noqa: E501 + + +def format_python_column( + column_db_name: str, + python_column: dict[str, any], + python_column_lines: dict[str, tuple[int, int]], +) -> str: + col_type = python_column.pop("python_type") + start, end = python_column_lines[col_type] + python_column_string = f"[{col_type}]({COLUMNS_URL}#L{start}-L{end})(" + col_name = python_column.pop("name") + if col_name != column_db_name: + python_column_string += f"name='{col_name}', " + custom_props = python_column.pop("custom_column_props", None) + custom_props_string = "" + if custom_props: + props_string = ", ".join([f"{k}={v}" for k, v in custom_props.items()]) + custom_props_string = f", {col_type}Props({props_string})" + python_column_string += ", ".join([f"{k}={v}" for k, v in python_column.items()]) + python_column_string += f"{custom_props_string})" + + return python_column_string + + +def parse_python_columns() -> dict[str, any]: + """Get the Python column definitions from the columns.py file.""" + columns = {} + python_column_lines = get_python_column_types() + + with open(COLUMNS_PATH) as f: + contents = f.read() + code = ast.parse(contents) + + for item in ast.iter_child_nodes(code): + if isinstance(item, ast.Assign): + column = parse_column_definition(item) + if not column: + continue + db_name = column["db_name"] + del column["db_name"] + columns[db_name] = format_python_column( + db_name, column, python_column_lines + ) + + return columns + + +def get_python_column_types() -> dict[str, tuple[int, int]]: + """ + Parse the columns.py file to get the Python column names + and their line numbers for hyperlinks. + Sample output: `StringColumn: (3, 5)`` + """ + with open(COLUMNS_PATH) as f: + file_contents = f.read() + code = ast.parse(file_contents) + return { + item.name: (item.lineno, item.end_lineno) + for item in ast.iter_child_nodes(code) + if isinstance(item, ast.ClassDef) and item.name.endswith("Column") + } + + +def parse_column_definition(item: ast.Assign) -> dict[str, any] | None: + column = { + "python_type": None, + "name": None, + "db_name": None, + "nullable": None, + "required": False, + "upsert_strategy": "newest_non_null", + "custom_column_props": {}, + } + if hasattr(item.value, "func") and hasattr(item.value.func, "id"): + column["python_type"] = item.value.func.id + + if hasattr(item.value, "keywords"): + for kw in item.value.keywords: + if hasattr(kw.value, "value"): + if kw.arg not in column.keys(): + column["custom_column_props"][kw.arg] = kw.value.value + else: + # upsert_strategy is a special case + if hasattr(kw.value, "attr"): + column[kw.arg] = kw.value.attr + else: + column[kw.arg] = kw.value.value + else: + if not hasattr(kw.value, "keywords"): + continue + # An Array column that has a base_column + column_params = ", ".join( + [f"{kw2.arg}={kw2.value.value}" for kw2 in kw.value.keywords] + ) + column["custom_column_props"][ + kw.arg + ] = f"{kw.value.func.id}({column_params})" + if column["db_name"] is None: + column["db_name"] = column["name"] + if column["name"] is None: + return None + if column["custom_column_props"] == {}: + del column["custom_column_props"] + if column["nullable"] is None: + column["nullable"] = ( + not column["required"] if column["required"] is not None else True + ) + return column + return None diff --git a/catalog/utilities/media_props_gen/generate_media_properties.py b/catalog/utilities/media_props_gen/generate_media_properties.py new file mode 100644 index 00000000000..7573b993862 --- /dev/null +++ b/catalog/utilities/media_props_gen/generate_media_properties.py @@ -0,0 +1,177 @@ +"""Automatic media properties generation.""" +import logging +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from column_parser import parse_python_columns + + +log = logging.getLogger(__name__) +# Silence noisy modules +logging.getLogger("common.storage.media").setLevel(logging.WARNING) + +# Constants +DOC_MD_PATH = Path(__file__).parent / "media_properties.md" +LOCAL_POSTGRES_FOLDER = Path(__file__).parents[3] / "docker" / "upstream_db" + +SQL_PATH = { + "image": LOCAL_POSTGRES_FOLDER / "0003_openledger_image_schema.sql", + "audio": LOCAL_POSTGRES_FOLDER / "0006_openledger_audio_schema.sql", +} +sql_types = [ + "integer", + "boolean", + "uuid", + "double precision", + "jsonb", + "timestamp with time zone", + "character varying", +] +sql_type_regex = re.compile(f"({'|'.join(sql_types)})") + + +@dataclass +class FieldInfo: + name: str + nullable: bool + datatype: str + constraint: str + python_column: str = "" + + +@dataclass +class FieldSqlInfo: + nullable: bool + datatype: str + constraint: str + + +def create_db_props_dict( + media_type: Literal["image", "audio"] +) -> dict[str, FieldSqlInfo]: + """ + Parse the DDL for a media type and returns a list of field + sql definitions. + """ + + create_table_regex = re.compile(r"CREATE\s+TABLE\s+\w+\.(\w+)\s+\(([\s\S]*?)\);") + sql_path = SQL_PATH[media_type] + + with open(sql_path) as f: + contents = f.read() + table_description_matches = create_table_regex.search(contents) + if not table_description_matches: + print(f"Could not find table description for {media_type} in {sql_path}") + return {} + table_name = table_description_matches.group(1) + if table_name != media_type: + print(f"Table name {table_name} does not match media type {media_type}") + return {} + field_descriptions = [ + field.strip() + for field in table_description_matches.group(2).split("\n") + if field.strip() + ] + fields = {} + for field in field_descriptions: + field_name = field.split(" ")[0] + False if "not null" in field.lower() else True + field_constraint = "" + try: + field_type = sql_type_regex.search(field).group(1) + if field_type == "character varying": + char_limit = field.split("(")[1].split(")")[0] + field_constraint = f"({char_limit})" + + if "[]" in field: + field_type = f"array of {field_type}" + except AttributeError: + raise ValueError(f"Could not find type for field {field_name} in {field}") + + fields[field_name] = { + "sql": FieldSqlInfo( + nullable="NOT NULL" not in field, + datatype=field_type, + constraint=field_constraint, + ) + } + return fields + + +def add_column_props(media_props, python_columns): + """Add the python column properties to the media properties dictionary.""" + for prop in media_props.keys(): + if not (python_prop := python_columns.get(prop)): + print(f"Column {prop} not found in table") + python_prop = "" + media_props[prop]["python_column"] = python_prop + return media_props + + +def generate_media_props() -> dict: + """ + Generate a dictionary with the media properties from the database, + python code and markdown documentation files. + """ + media_props = {} + python_columns = parse_python_columns() + for media_type in ["image", "audio"]: + media_props[media_type] = create_db_props_dict(media_type) + media_props[media_type] = add_column_props( + media_props[media_type], python_columns + ) + return media_props + + +def generate_media_props_table(media_properties) -> str: + """Generate the table with media properties.""" + + # Convert the list of FieldInfo objects to a md table + table = "| DB Field | DB Nullable | DB Type | Python Column | Description | \n" + table += "| --- | --- | --- | --- | --- | \n" + media_docs = {} + for field_name, field in media_properties.items(): + field_sql = field["sql"] + field_db_type = ( + field_sql.datatype + if not field_sql.constraint + else f"{field_sql.datatype} {field_sql.constraint}" + ) + table += ( + f"| {field_name} | {field_sql.nullable} | " + f"{field_db_type} | {field.get('python_column', '')} | " + f"{media_docs.get(field_name) or ''}\n" + ) + + return table + + +def generate_markdown_doc(media_properties: dict[str, dict]) -> str: + """ + Generate the tables with media properties database column and + Python objects characteristics. + """ + with open(Path(__file__).parent / "preamble.md") as f: + preamble = f.read() + media_props_doc = f"""{preamble} +## Image Properties\n +{generate_media_props_table(media_properties["image"])} +""" # noqa 501 + media_props_doc += f"""## Audio Properties\n +{generate_media_props_table(media_properties["audio"])} +""" + return media_props_doc + + +def write_media_props_doc(path: Path = DOC_MD_PATH) -> None: + """Generate the DAG documentation and write it to a file.""" + media_properties = generate_media_props() + doc_text = generate_markdown_doc(media_properties) + log.info(f"Writing DAG doc to {path}") + path.write_text(doc_text) + + +if __name__ == "__main__": + write_media_props_doc() diff --git a/catalog/utilities/media_props_gen/media_props.md b/catalog/utilities/media_props_gen/media_props.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/catalog/utilities/media_props_gen/postamble.md b/catalog/utilities/media_props_gen/postamble.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/catalog/utilities/media_props_gen/preamble.md b/catalog/utilities/media_props_gen/preamble.md new file mode 100644 index 00000000000..01cfc481a15 --- /dev/null +++ b/catalog/utilities/media_props_gen/preamble.md @@ -0,0 +1,9 @@ +# Media Properties + +_This document is auto-generated from the source code in +utilities/media_props_gen/generate_media_propertes.py._ + +This is a list of the media properties, with the descriptions of corresponding +database columns and Python objects that are used to store and retrieve media +data. The order of the properties corresponds to their order in the image_view +materialized view. diff --git a/documentation/meta/media_properties.md b/documentation/meta/media_properties.md new file mode 100644 index 00000000000..9e230cfdafb --- /dev/null +++ b/documentation/meta/media_properties.md @@ -0,0 +1,76 @@ +# Media Properties + +_This document is auto-generated from the source code in +utilities/media_props_gen/generate_media_propertes.py._ + +This is a list of the media properties, with the descriptions of corresponding +database columns and Python objects that are used to store and retrieve media +data. The order of the properties corresponds to their order in the image_view +materialized view. + +## Image Properties + +| DB Field | DB Nullable | DB Type | Python Column | Description | +| ----------------------- | ----------- | ------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| identifier | True | uuid | [UUIDColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L500-L517)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| created_on | False | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=False, required=True, upsert_strategy=no_change) | +| updated_on | False | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=False, required=True, upsert_strategy=newest_non_null) | +| ingestion_type | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| provider | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| source | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| foreign_identifier | True | character varying (3000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=3000, truncate=False)) | +| foreign_landing_url | True | character varying (1000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=True, required=True, upsert_strategy=newest_non_null, URLColumnProps(size=1000)) | +| url | False | character varying (3000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=False, required=True, upsert_strategy=newest_non_null, URLColumnProps(size=3000)) | +| thumbnail | True | character varying (3000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(name='thumbnail_url', nullable=True, required=False, upsert_strategy=newest_non_null, URLColumnProps(size=3000)) | +| width | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| height | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| filesize | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| license | False | character varying (50) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(name='license\_', nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=50, truncate=False)) | +| license_version | True | character varying (25) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=25, truncate=False)) | +| creator | True | character varying (2000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=2000, truncate=True)) | +| creator_url | True | character varying (2000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=True, required=False, upsert_strategy=newest_non_null, URLColumnProps(size=2000)) | +| title | True | character varying (5000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=5000, truncate=True)) | +| meta_data | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| tags | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=merge_jsonb_arrays) | +| watermarked | True | boolean | [BooleanColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L340-L385)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| last_synced_with_source | True | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| removed_from_source | False | boolean | [BooleanColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L340-L385)(nullable=False, required=True, upsert_strategy=false) | +| filetype | True | character varying (5) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(truncate=False, size=5)) | +| category | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| standardized_popularity | True | double precision | [CalculatedColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L259-L337)(nullable=True, required=False, upsert_strategy=newest_non_null) | + +## Audio Properties + +| DB Field | DB Nullable | DB Type | Python Column | Description | +| ----------------------- | ----------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| identifier | True | uuid | [UUIDColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L500-L517)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| created_on | False | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=False, required=True, upsert_strategy=no_change) | +| updated_on | False | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=False, required=True, upsert_strategy=newest_non_null) | +| ingestion_type | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| provider | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| source | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| foreign_identifier | True | character varying (3000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=3000, truncate=False)) | +| foreign_landing_url | True | character varying (1000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=True, required=True, upsert_strategy=newest_non_null, URLColumnProps(size=1000)) | +| url | False | character varying (3000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=False, required=True, upsert_strategy=newest_non_null, URLColumnProps(size=3000)) | +| thumbnail | True | character varying (3000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(name='thumbnail_url', nullable=True, required=False, upsert_strategy=newest_non_null, URLColumnProps(size=3000)) | +| filetype | True | character varying (5) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(truncate=False, size=5)) | +| duration | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| bit_rate | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| sample_rate | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| category | True | character varying (80) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=80, truncate=False)) | +| genres | True | array of character varying (80) | [ArrayColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L599-L651)(nullable=True, required=False, upsert_strategy=newest_non_null, ArrayColumnProps(base_column=StringColumn(name=genre, required=False, size=80, truncate=False))) | +| audio_set | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| set_position | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| alt_files | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=merge_jsonb_arrays) | +| filesize | True | integer | [IntegerColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L216-L256)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| license | False | character varying (50) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(name='license\_', nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=50, truncate=False)) | +| license_version | True | character varying (25) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=False, required=True, upsert_strategy=newest_non_null, StringColumnProps(size=25, truncate=False)) | +| creator | True | character varying (2000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=2000, truncate=True)) | +| creator_url | True | character varying (2000) | [URLColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L550-L596)(nullable=True, required=False, upsert_strategy=newest_non_null, URLColumnProps(size=2000)) | +| title | True | character varying (5000) | [StringColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L457-L497)(nullable=True, required=False, upsert_strategy=newest_non_null, StringColumnProps(size=5000, truncate=True)) | +| meta_data | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| tags | True | jsonb | [JSONColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L388-L454)(nullable=True, required=False, upsert_strategy=merge_jsonb_arrays) | +| watermarked | True | boolean | [BooleanColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L340-L385)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| last_synced_with_source | True | timestamp with time zone | [TimestampColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L520-L547)(nullable=True, required=False, upsert_strategy=newest_non_null) | +| removed_from_source | False | boolean | [BooleanColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L340-L385)(nullable=False, required=True, upsert_strategy=false) | +| standardized_popularity | True | double precision | [CalculatedColumn](https://github.com/WordPress/openverse/blob/main/catalog/dags/common/storage/columns.py#L259-L337)(nullable=True, required=False, upsert_strategy=newest_non_null) |