Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Snapshot schema 72 (#13873)
Browse files Browse the repository at this point in the history
Including another batch of fixes to the schema dump script
  • Loading branch information
David Robertson authored Sep 26, 2022
1 parent 41461fd commit 0a38c7e
Show file tree
Hide file tree
Showing 13 changed files with 2,165 additions and 22 deletions.
1 change: 1 addition & 0 deletions changelog.d/13873.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Create a new snapshot of the database schema.
60 changes: 46 additions & 14 deletions scripts-dev/make_full_schema.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ usage() {
echo " Defaults to 9999."
echo "-h"
echo " Display this help text."
echo ""
echo " NB: make sure to run this against the *oldest* supported version of postgres,"
echo " or else pg_dump might output non-backwards-compatible syntax."
}

SCHEMA_NUMBER="9999"
Expand Down Expand Up @@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat;

echo "Dumping SQLite3 schema..."

mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"

cleanup_pg_schema() {
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
# Cleanup as follows:
# - Remove empty lines. pg_dump likes to output a lot of these.
# - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
# separate tables etc.
# - Remove "public." prefix --- the schema name.
# - Remove "SET" commands. Last time I ran this, the output commands were
# SET statement_timeout = 0;
# SET lock_timeout = 0;
# SET idle_in_transaction_session_timeout = 0;
# SET client_encoding = 'UTF8';
# SET standard_conforming_strings = on;
# SET check_function_bodies = false;
# SET xmloption = content;
# SET client_min_messages = warning;
# SET row_security = off;
# SET default_table_access_method = heap;
# - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
# SELECT statements because some of those have side-effects which we do want in the
# schema. Last time I ran this, the only SELECTS were
# SELECT pg_catalog.set_config('search_path', '', false);
# and
# SELECT pg_catalog.setval(text, bigint, bool);
# We do want to remove the former, but the latter is important. If the last argument
# is `true` or omitted, this marks the given integer as having been consumed and
# will NOT appear as the nextval.
sed -e '/^$/d' \
-e '/^--/d' \
-e 's/public\.//g' \
-e '/^SET /d' \
-e '/^SELECT pg_catalog.set_config/d'
}

echo "Dumping Postgres schema..."

pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"

echo "Done! Files dumped to: $OUTPUT_DIR"
8 changes: 8 additions & 0 deletions synapse/storage/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,14 @@ def execute(self, sql: str, *args: Any) -> None:
def executemany(self, sql: str, *args: Any) -> None:
self._do_execute(self.txn.executemany, sql, *args)

def executescript(self, sql: str) -> None:
if isinstance(self.database_engine, Sqlite3Engine):
self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined]
else:
raise NotImplementedError(
f"executescript only exists for sqlite driver, not {type(self.database_engine)}"
)

def _make_sql_one_line(self, sql: str) -> str:
"Strip newlines out of SQL so that the loggers in the DB are on one line"
return " ".join(line.strip() for line in sql.splitlines() if line.strip())
Expand Down
23 changes: 21 additions & 2 deletions synapse/storage/engines/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError):


ConnectionType = TypeVar("ConnectionType", bound=Connection)
CursorType = TypeVar("CursorType", bound=Cursor)


class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta):
def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]):
self.module = module

Expand Down Expand Up @@ -64,7 +65,7 @@ def check_database(
...

@abc.abstractmethod
def check_new_database(self, txn: Cursor) -> None:
def check_new_database(self, txn: CursorType) -> None:
"""Gets called when setting up a brand new database. This allows us to
apply stricter checks on new databases versus existing database.
"""
Expand Down Expand Up @@ -124,3 +125,21 @@ def attempt_to_set_isolation_level(
Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
"""
...

@staticmethod
@abc.abstractmethod
def executescript(cursor: CursorType, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
This is not provided by DBAPI2, and so needs engine-specific support.
"""
...

@classmethod
def execute_script_file(cls, cursor: CursorType, filepath: str) -> None:
"""Execute a file containing multiple semicolon-delimited SQL statements.
This is not provided by DBAPI2, and so needs engine-specific support.
"""
with open(filepath, "rt") as f:
cls.executescript(cursor, f.read())
12 changes: 11 additions & 1 deletion synapse/storage/engines/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
logger = logging.getLogger(__name__)


class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
class PostgresEngine(
BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(psycopg2, database_config)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
Expand Down Expand Up @@ -212,3 +214,11 @@ def attempt_to_set_isolation_level(
else:
isolation_level = self.isolation_level_map[isolation_level]
return conn.set_isolation_level(isolation_level)

@staticmethod
def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
"""
cursor.execute(script)
21 changes: 20 additions & 1 deletion synapse/storage/engines/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from synapse.storage.database import LoggingDatabaseConnection


class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
def __init__(self, database_config: Mapping[str, Any]):
super().__init__(sqlite3, database_config)

Expand Down Expand Up @@ -120,6 +120,25 @@ def attempt_to_set_isolation_level(
# All transactions are SERIALIZABLE by default in sqlite
pass

@staticmethod
def executescript(cursor: sqlite3.Cursor, script: str) -> None:
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
Python's built-in SQLite driver does not allow you to do this with DBAPI2's
`execute`:
> execute() will only execute a single SQL statement. If you try to execute more
> than one statement with it, it will raise a Warning. Use executescript() if
> you want to execute multiple SQL statements with one call.
Though the docs for `executescript` warn:
> If there is a pending transaction, an implicit COMMIT statement is executed
> first. No other implicit transaction control is performed; any transaction
> control must be added to sql_script.
"""
cursor.executescript(script)


# Following functions taken from: https://github.com/coleifer/peewee

Expand Down
8 changes: 4 additions & 4 deletions synapse/storage/prepare_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def _setup_new_database(
".sql." + specific
):
logger.debug("Applying schema %s", entry.absolute_path)
executescript(cur, entry.absolute_path)
database_engine.execute_script_file(cur, entry.absolute_path)

cur.execute(
"INSERT INTO schema_version (version, upgraded) VALUES (?,?)",
Expand Down Expand Up @@ -517,15 +517,15 @@ def _upgrade_existing_database(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext == specific_engine_extension and root_name.endswith(".sql"):
# A .sql file specific to our engine; just read and execute it
if is_worker:
raise PrepareDatabaseException(
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
)
logger.info("Applying engine-specific schema %s", relative_path)
executescript(cur, absolute_path)
database_engine.execute_script_file(cur, absolute_path)
elif ext in specific_engine_extensions and root_name.endswith(".sql"):
# A .sql file for a different engine; skip it.
continue
Expand Down Expand Up @@ -666,7 +666,7 @@ def _get_or_create_schema_state(
) -> Optional[_SchemaState]:
# Bluntly try creating the schema_version tables.
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
executescript(txn, sql_path)
database_engine.execute_script_file(txn, sql_path)

txn.execute("SELECT version, upgraded FROM schema_version")
row = txn.fetchone()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text,
ordering integer DEFAULT 0 NOT NULL
);
ALTER TABLE ONLY background_updates
ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
6 changes: 6 additions & 0 deletions synapse/storage/schema/common/full_schemas/72/full.sql.sqlite
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE TABLE background_updates (
update_name text NOT NULL,
progress_json text NOT NULL,
depends_on text, ordering INT NOT NULL DEFAULT 0,
CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
);
Loading

0 comments on commit 0a38c7e

Please sign in to comment.