From f17e82b8403a4d03225ac8b03d3e43534802650c Mon Sep 17 00:00:00 2001 From: Henry Tsai Date: Fri, 12 Jul 2024 15:49:19 -0700 Subject: [PATCH] Added critical indexes to Message Store tables (#46) - Added critical indexes to Message Store tables - Changed boolean value -> string conversion to 0/1 instead to align with SQL design guidelines and have faster lookup - Added limits to column lengths (required by MySQL for indexed columns) - Removed obsolete columns in Message Store tables - Added QoL `launch.json` for `VSC` for one-click debugging --- .vscode/launch.json | 18 +++++ package-lock.json | 8 +- package.json | 2 +- sql-dialect-variations.md | 5 ++ src/event-log-sql.ts | 54 ++++++------- src/message-store-sql.ts | 156 +++++++++++++++++++++++++------------- src/types.ts | 23 ++---- src/utils/sanitize.ts | 18 +++-- 8 files changed, 172 insertions(+), 112 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..a4c2b03 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Tests", + "runtimeExecutable": "${workspaceRoot}/node_modules/.bin/mocha", + "runtimeArgs": [ + "tests/compiled/**/*.spec.js" + ], + "console": "internalConsole" + } + ] +} diff --git a/package-lock.json b/package-lock.json index 8a308c2..02dda1d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "license": "Apache-2.0", "dependencies": { "@ipld/dag-cbor": "9.0.5", - "@tbd54566975/dwn-sdk-js": "0.4.1", + "@tbd54566975/dwn-sdk-js": "0.4.2", "kysely": "0.26.3", "multiformats": "12.0.1", "readable-stream": "4.4.2" @@ -908,9 +908,9 @@ "dev": true }, "node_modules/@tbd54566975/dwn-sdk-js": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@tbd54566975/dwn-sdk-js/-/dwn-sdk-js-0.4.1.tgz", - "integrity": "sha512-AkB0pl5LunxTwBRN/q+jgSRRRkT/6TDqr6SSMLTocXnSqptdTvSymDmGJ3/8d75YNvRsSgAuiVeKA9NoTJG3DQ==", + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@tbd54566975/dwn-sdk-js/-/dwn-sdk-js-0.4.2.tgz", + "integrity": "sha512-ABwu6RMHDtTcRbI+b9AyqC3l7Hf50TJhbBaCMax9o/IV7KtM0RyMMBka5M7eGzQyHgWISykdUGW59yhe7/Z5Ig==", "dependencies": { "@ipld/dag-cbor": "9.0.3", "@js-temporal/polyfill": "0.4.4", diff --git a/package.json b/package.json index e851034..619a0bd 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "react-native": "./dist/esm/src/main.js", "dependencies": { "@ipld/dag-cbor": "9.0.5", - "@tbd54566975/dwn-sdk-js": "0.4.1", + "@tbd54566975/dwn-sdk-js": "0.4.2", "kysely": "0.26.3", "multiformats": "12.0.1", "readable-stream": "4.4.2" diff --git a/sql-dialect-variations.md b/sql-dialect-variations.md index 88743e5..aa52537 100644 --- a/sql-dialect-variations.md +++ b/sql-dialect-variations.md @@ -10,6 +10,8 @@ We use `Kysely` to help us abstract differences between SQL variants in this cod - Does not support "if not exists" syntax when creating indexes, workaround is to create index only on a newly created table. +- Requires length specified in a column for it to be indexable. The work around is easy however: use 'varchar()' when declaring the column. + ## PostgreSQL - Uses a special type: `serial` for auto-increment columns. @@ -18,3 +20,6 @@ We use `Kysely` to help us abstract differences between SQL variants in this cod - `bigint` column type gets returned as `string` in `pg` library. +## SQLite + +- `sqlite3` we use in `Kysely` does not support insertion of boolean values, as a result we need to convert boolean values into integers in DB queries. \ No newline at end of file diff --git a/src/event-log-sql.ts b/src/event-log-sql.ts index b67c0f0..055a1fd 100644 --- a/src/event-log-sql.ts +++ b/src/event-log-sql.ts @@ -27,40 +27,32 @@ export class EventLogSql implements EventLog { let createTable = this.#db.schema .createTable('eventLogMessages') .ifNotExists() - .addColumn('tenant', 'text', (col) => col.notNull()) + .addColumn('tenant', 'varchar(100)', (col) => col.notNull()) .addColumn('messageCid', 'varchar(60)', (col) => col.notNull()) - // "indexes" start - .addColumn('interface', 'text') - .addColumn('method', 'text') - .addColumn('schema', 'text') - .addColumn('dataCid', 'text') + .addColumn('interface', 'varchar(20)') + .addColumn('method', 'varchar(20)') + .addColumn('recordId', 'varchar(60)') + .addColumn('entryId','varchar(60)') + .addColumn('parentId', 'varchar(60)') + .addColumn('protocol', 'varchar(200)') + .addColumn('protocolPath', 'varchar(200)') + .addColumn('contextId', 'varchar(500)') + .addColumn('schema', 'varchar(200)') + .addColumn('author', 'varchar(100)') + .addColumn('recipient', 'varchar(100)') + .addColumn('messageTimestamp', 'varchar(30)') + .addColumn('dateCreated', 'varchar(30)') + .addColumn('datePublished', 'varchar(30)') + .addColumn('isLatestBaseState', 'boolean') + .addColumn('published', 'boolean') + .addColumn('prune', 'boolean') + .addColumn('dataFormat', 'varchar(30)') + .addColumn('dataCid', 'varchar(60)') .addColumn('dataSize', 'integer') - .addColumn('dateCreated', 'text') - .addColumn('delegated', 'text') - .addColumn('messageTimestamp', 'text') - .addColumn('dataFormat', 'text') - .addColumn('isLatestBaseState', 'text') - .addColumn('published', 'text') - .addColumn('author', 'text') - .addColumn('recordId', 'text') - .addColumn('entryId', 'text') - .addColumn('datePublished', 'text') - .addColumn('latest', 'text') - .addColumn('protocol', 'text') - .addColumn('dateExpires', 'text') - .addColumn('description', 'text') - .addColumn('grantedTo', 'text') - .addColumn('grantedBy', 'text') - .addColumn('grantedFor', 'text') - .addColumn('permissionsRequestId', 'text') + // .addColumn('encodedData', 'text') // intentionally kept commented out code to show the only difference to `messageStoreMessages` table .addColumn('attester', 'text') - .addColumn('protocolPath', 'text') - .addColumn('recipient', 'text') - .addColumn('contextId', 'text') - .addColumn('parentId', 'text') - .addColumn('permissionGrantId', 'text') - .addColumn('prune', 'text'); - // "indexes" end + .addColumn('permissionGrantId', 'varchar(60)') + .addColumn('latest', 'text'); // TODO: obsolete, remove once `dwn-sdk-js` is updated let createRecordsTagsTable = this.#db.schema .createTable('eventLogRecordsTags') diff --git a/src/message-store-sql.ts b/src/message-store-sql.ts index 28653a9..fce7a60 100644 --- a/src/message-store-sql.ts +++ b/src/message-store-sql.ts @@ -40,60 +40,108 @@ export class MessageStoreSql implements MessageStore { } this.#db = new Kysely({ dialect: this.#dialect }); - let createTable = this.#db.schema - .createTable('messageStoreMessages') - .ifNotExists() - .addColumn('tenant', 'varchar(255)', (col) => col.notNull()) - .addColumn('messageCid', 'varchar(60)', (col) => col.notNull()) - .addColumn('encodedData', 'text') // we optionally store encoded data if it is below a threshold - // "indexes" start - .addColumn('interface', 'text') - .addColumn('method', 'text') - .addColumn('schema', 'text') - .addColumn('dataCid', 'text') - .addColumn('dataSize', 'integer') - .addColumn('dateCreated', 'text') - .addColumn('delegated', 'text') - .addColumn('messageTimestamp', 'text') - .addColumn('dataFormat', 'text') - .addColumn('isLatestBaseState', 'text') - .addColumn('published', 'text') - .addColumn('author', 'text') - .addColumn('recordId', 'varchar(60)') - .addColumn('entryId', 'text') - .addColumn('datePublished', 'text') - .addColumn('latest', 'text') - .addColumn('protocol', 'text') - .addColumn('dateExpires', 'text') - .addColumn('description', 'text') - .addColumn('grantedTo', 'text') - .addColumn('grantedBy', 'text') - .addColumn('grantedFor', 'text') - .addColumn('permissionsRequestId', 'text') - .addColumn('attester', 'text') - .addColumn('protocolPath', 'text') - .addColumn('recipient', 'text') - .addColumn('contextId', 'text') - .addColumn('parentId', 'text') - .addColumn('permissionGrantId', 'text') - .addColumn('prune', 'text'); - // "indexes" end - - let createRecordsTagsTable = this.#db.schema - .createTable('messageStoreRecordsTags') - .ifNotExists() - .addColumn('tag', 'text', (col) => col.notNull()) - .addColumn('valueString', 'text') - .addColumn('valueNumber', 'decimal'); - - // Add columns that have dialect-specific constraints - createTable = this.#dialect.addAutoIncrementingColumn(createTable, 'id', (col) => col.primaryKey()); - createTable = this.#dialect.addBlobColumn(createTable, 'encodedMessageBytes', (col) => col.notNull()); - createRecordsTagsTable = this.#dialect.addAutoIncrementingColumn(createRecordsTagsTable, 'id', (col) => col.primaryKey()); - createRecordsTagsTable = this.#dialect.addReferencedColumn(createRecordsTagsTable, 'messageStoreRecordsTags', 'messageInsertId', 'integer', 'messageStoreMessages', 'id', 'cascade'); - - await createTable.execute(); - await createRecordsTagsTable.execute(); + + // create messages table if it does not exist + const messagesTableName = 'messageStoreMessages'; + const messagesTableExists = await this.#dialect.hasTable(this.#db, messagesTableName); + if (!messagesTableExists) { + let createMessagesTable = this.#db.schema + .createTable(messagesTableName) + .ifNotExists() + .addColumn('tenant', 'varchar(100)', (col) => col.notNull()) + .addColumn('messageCid', 'varchar(60)', (col) => col.notNull()) + .addColumn('interface', 'varchar(20)') + .addColumn('method', 'varchar(20)') + .addColumn('recordId', 'varchar(60)') + .addColumn('entryId','varchar(60)') + .addColumn('parentId', 'varchar(60)') + .addColumn('protocol', 'varchar(200)') + .addColumn('protocolPath', 'varchar(200)') + .addColumn('contextId', 'varchar(500)') + .addColumn('schema', 'varchar(200)') + .addColumn('author', 'varchar(100)') + .addColumn('recipient', 'varchar(100)') + .addColumn('messageTimestamp', 'varchar(30)') + .addColumn('dateCreated', 'varchar(30)') + .addColumn('datePublished', 'varchar(30)') + .addColumn('isLatestBaseState', 'boolean') + .addColumn('published', 'boolean') + .addColumn('prune', 'boolean') + .addColumn('dataFormat', 'varchar(30)') + .addColumn('dataCid', 'varchar(60)') + .addColumn('dataSize', 'integer') + .addColumn('encodedData', 'text') // we optionally store encoded data if it is below a threshold + .addColumn('attester', 'text') + .addColumn('permissionGrantId', 'varchar(60)') + .addColumn('latest', 'text'); // TODO: obsolete, remove once `dwn-sdk-js` tests are updated + + // Add columns that have dialect-specific constraints + createMessagesTable = this.#dialect.addAutoIncrementingColumn(createMessagesTable, 'id', (col) => col.primaryKey()); + createMessagesTable = this.#dialect.addBlobColumn(createMessagesTable, 'encodedMessageBytes', (col) => col.notNull()); + await createMessagesTable.execute(); + + // add indexes to the table + await this.createIndexes(this.#db, messagesTableName, [ + ['tenant'], // baseline protection to prevent full table scans across all tenants + ['tenant', 'recordId'], // multiple uses, notably heavily depended by record chain construction for protocol authorization + ['tenant', 'parentId'], // used to walk down hierarchy of records, use cases include purging of records + ['tenant', 'protocol', 'published', 'messageTimestamp'], // index used for basically every external query. + ['tenant', 'interface'], // mainly for fast fetch of ProtocolsConfigure for authorization, not needed if protocol was a DWN Record + ['tenant', 'contextId', 'messageTimestamp'], // expected to be used for common query pattern + ['tenant', 'permissionGrantId'], // for deleting grant-authorized messages though pending https://github.com/TBD54566975/dwn-sdk-js/issues/716 + // other potential indexes + // ['tenant', 'author'], + // ['tenant', 'recipient'], + // ['tenant', 'schema', 'dataFormat'], + // ['tenant', 'dateCreated'], + // ['tenant', 'datePublished'], + // ['tenant', 'messageCid'], + // ['tenant', 'protocolPath'], + ]); + } + + // create tags table + const tagsTableName = 'messageStoreRecordsTags'; + const tagsTableExists = await this.#dialect.hasTable(this.#db, tagsTableName); + if (!tagsTableExists) { + let createRecordsTagsTable = this.#db.schema + .createTable(tagsTableName) + .ifNotExists() + .addColumn('tag', 'varchar(30)', (col) => col.notNull()) + .addColumn('valueString', 'varchar(200)') + .addColumn('valueNumber', 'decimal'); + + // Add columns that have dialect-specific constraints + const foreignMessageInsertId = 'messageInsertId'; + createRecordsTagsTable = this.#dialect.addAutoIncrementingColumn(createRecordsTagsTable, 'id', (col) => col.primaryKey()); + createRecordsTagsTable = this.#dialect.addReferencedColumn(createRecordsTagsTable, tagsTableName, foreignMessageInsertId, 'integer', 'messageStoreMessages', 'id', 'cascade'); + await createRecordsTagsTable.execute(); + + // add indexes to the table + await this.createIndexes(this.#db, tagsTableName, [ + [foreignMessageInsertId], + ['tag', 'valueString'], + ['tag', 'valueNumber'] + ]); + } + } + + /** + * Creates indexes on the given table. + * @param tableName The name of the table to create the indexes on. + * @param indexes Each inner array represents a single index and contains the column names to be indexed as a composite index. + * If the inner array contains only one element, it will be treated as a single column index. + */ + async createIndexes(database: Kysely, tableName: string, indexes: string[][]): Promise { + for (const columnNames of indexes) { + const indexName = 'index_' + columnNames.join('_'); // e.g. index_tenant_protocol + await database.schema + .createIndex(indexName) + // .ifNotExists() // intentionally kept commented out code to show that it is not supported by all dialects (ie. MySQL) + .on(tableName) + .columns(columnNames) + .execute(); + } } async close(): Promise { diff --git a/src/types.ts b/src/types.ts index 9a4f655..eec9def 100644 --- a/src/types.ts +++ b/src/types.ts @@ -16,19 +16,14 @@ type EventLogTable = { dateCreated: string | null; messageTimestamp: string | null; dataFormat: string | null; - isLatestBaseState: string | null; - published: string | null; + isLatestBaseState: boolean | null; + published: boolean | null; author: string | null; recordId: string | null; entryId: string | null; datePublished: string | null; latest: string | null; protocol: string | null; - dateExpires: string | null; - description: string | null; - grantedTo: string | null; - grantedBy: string | null; - grantedFor: string | null; permissionsRequestId: string | null; attester: string | null; protocolPath: string | null; @@ -36,7 +31,7 @@ type EventLogTable = { contextId: string | null; parentId: string | null; permissionGrantId: string | null; - prune: string | null; + prune: boolean | null; // "indexes" end } @@ -55,19 +50,13 @@ type MessageStoreTable = { dateCreated: string | null; messageTimestamp: string | null; dataFormat: string | null; - isLatestBaseState: string | null; - published: string | null; + isLatestBaseState: boolean | null; + published: boolean | null; author: string | null; recordId: string | null; entryId: string | null; datePublished: string | null; - latest: string | null; protocol: string | null; - dateExpires: string | null; - description: string | null; - grantedTo: string | null; - grantedBy: string | null; - grantedFor: string | null; permissionsRequestId: string | null; attester: string | null; protocolPath: string | null; @@ -75,7 +64,7 @@ type MessageStoreTable = { contextId: string | null; parentId: string | null; permissionGrantId: string | null; - prune: string | null; + prune: boolean | null; // "indexes" end } diff --git a/src/utils/sanitize.ts b/src/utils/sanitize.ts index 5543c87..9bbf53c 100644 --- a/src/utils/sanitize.ts +++ b/src/utils/sanitize.ts @@ -40,12 +40,14 @@ export function sanitizeIndexes(records: KeyValues) { } } -// we sanitize the incoming value into a string or number -// sqlite3 and the driver we use does not support booleans, so we convert them to strings +/** + * Sanitizes the given value into a string or number. + * NOTE: sqlite3 we use does not support inserting boolean values, so we convert them to a number. + */ export function sanitizedValue(value: string | number | boolean): string | number { switch (typeof value) { case 'boolean': - return String(value); + return value ? 1 : 0; default: return value; } @@ -85,12 +87,18 @@ export function sanitizeFiltersAndSeparateTags(filters: Filter[]): { return extractedFilters; } -// we sanitize the filter value for a string representation of the boolean +// we sanitize the filter value for a number representation of the boolean + +/** + * Sanitizes the given filter value to align with the value conversions done during insertions/updates. + * NOTE: sqlite3 we use does not support inserting boolean values, + * so we convert them to a number during insertions/updates, as a result we need to align the filter values in queries. +*/ // TODO: export filter types from `dwn-sdk-js` export function sanitizeFilterValue(value: any): any { switch (typeof value) { case 'boolean': - return String(value); + return value ? 1 : 0; default: return value; }