From d3a9a4fb874105a550253821bf3052fce2a5d3b1 Mon Sep 17 00:00:00 2001 From: Alexandr Burdiyan Date: Sat, 2 Mar 2024 00:53:03 +0100 Subject: [PATCH] refactor(backend): define blob indexes with better locality --- backend/daemon/storage/migrations.go | 6 ++++++ backend/daemon/storage/schema.gensum | 2 +- backend/daemon/storage/schema.sql | 6 ++++++ backend/hyper/blockstore_test.go | 5 +++-- backend/hyper/hypersql/queries.gen.go | 5 ++++- backend/hyper/hypersql/queries.gensum | 4 ++-- backend/hyper/hypersql/queries.go | 5 ++++- 7 files changed, 26 insertions(+), 7 deletions(-) diff --git a/backend/daemon/storage/migrations.go b/backend/daemon/storage/migrations.go index c8da766cb..bd7dc6745 100644 --- a/backend/daemon/storage/migrations.go +++ b/backend/daemon/storage/migrations.go @@ -362,6 +362,12 @@ var migrations = []migration{ DELETE FROM kv WHERE key = 'last_reindex_time'; `)) }}, + {Version: "2024-03-01.03", Run: func(_ *Dir, conn *sqlite.Conn) error { + return sqlitex.ExecScript(conn, sqlfmt(` + CREATE INDEX blobs_metadata ON blobs (id, multihash, codec, size, insert_time); + CREATE INDEX blobs_metadata_by_hash ON blobs (multihash, codec, size, insert_time); + `)) + }}, } const ( diff --git a/backend/daemon/storage/schema.gensum b/backend/daemon/storage/schema.gensum index eb9e3d4ef..16ac5085b 100644 --- a/backend/daemon/storage/schema.gensum +++ b/backend/daemon/storage/schema.gensum @@ -1,2 +1,2 @@ -srcs: d768fdb89d3eb5d4bee55227ac36c42d +srcs: 31b832ee95aa3ac2dba0877815c96e4e outs: 6e8f9aaea92a324bcd6776afeaa0230c diff --git a/backend/daemon/storage/schema.sql b/backend/daemon/storage/schema.sql index c6a51681b..620562891 100644 --- a/backend/daemon/storage/schema.sql +++ b/backend/daemon/storage/schema.sql @@ -35,6 +35,12 @@ CREATE TABLE blobs ( insert_time INTEGER DEFAULT (strftime('%s', 'now')) NOT NULL ); +-- Index for better data locality when we need to iterate over blobs without their data. +-- Without the index loading the entire list of blobs into memory takes forever, +-- because SQLite has to read way too many pages skipping the actual blob data. +CREATE INDEX blobs_metadata ON blobs (id, multihash, codec, size, insert_time); +CREATE INDEX blobs_metadata_by_hash ON blobs (multihash, codec, size, insert_time); + -- Stores some relevant attributes for structural blobs, -- which are those blobs that we can understand more deeply than just an opaque blob. CREATE TABLE structural_blobs ( diff --git a/backend/hyper/blockstore_test.go b/backend/hyper/blockstore_test.go index 7118394fb..19fd66de0 100644 --- a/backend/hyper/blockstore_test.go +++ b/backend/hyper/blockstore_test.go @@ -16,6 +16,7 @@ import ( format "github.com/ipfs/go-ipld-format" "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" + "golang.org/x/exp/slices" ) func TestGet(t *testing.T) { @@ -154,12 +155,12 @@ func TestAllKeysRespectsContext(t *testing.T) { // consume 2, then cancel context. v, ok := <-ch - require.Equal(t, keys[0], v) require.True(t, ok) + require.True(t, slices.Contains(keys, v)) v, ok = <-ch - require.Equal(t, keys[1], v) require.True(t, ok) + require.True(t, slices.Contains(keys, v)) cancel() diff --git a/backend/hyper/hypersql/queries.gen.go b/backend/hyper/hypersql/queries.gen.go index 429557520..2835a7975 100644 --- a/backend/hyper/hypersql/queries.gen.go +++ b/backend/hyper/hypersql/queries.gen.go @@ -196,7 +196,10 @@ type BlobsListKnownResult struct { func BlobsListKnown(conn *sqlite.Conn) ([]BlobsListKnownResult, error) { const query = `SELECT blobs.id, blobs.multihash, blobs.codec FROM blobs -WHERE blobs.size >= 0` +LEFT JOIN drafts ON drafts.blob = blobs.id +WHERE blobs.size >= 0 +AND drafts.blob IS NULL +ORDER BY blobs.id` var out []BlobsListKnownResult diff --git a/backend/hyper/hypersql/queries.gensum b/backend/hyper/hypersql/queries.gensum index eae7c003d..041a16c7d 100644 --- a/backend/hyper/hypersql/queries.gensum +++ b/backend/hyper/hypersql/queries.gensum @@ -1,2 +1,2 @@ -srcs: 710260753f9d777d05b98094175d347d -outs: 9427f9db7ecb4403869c9cd98100e1cc +srcs: 800af070fac742f44f90f3142c9ffcdd +outs: ed8e2acc2115513302d723aab1d251e1 diff --git a/backend/hyper/hypersql/queries.go b/backend/hyper/hypersql/queries.go index 48cb88d54..0d9f5b9d0 100644 --- a/backend/hyper/hypersql/queries.go +++ b/backend/hyper/hypersql/queries.go @@ -67,7 +67,10 @@ func generateQueries() error { s.BlobsCodec, ), '\n', "FROM", s.Blobs, '\n', - "WHERE", s.BlobsSize, ">=", "0", + "LEFT JOIN", s.Drafts, "ON", s.DraftsBlob, "=", s.BlobsID, '\n', + "WHERE", s.BlobsSize, ">=", "0", '\n', + "AND", s.DraftsBlob, "IS NULL", '\n', + "ORDER BY", s.BlobsID, ), qb.MakeQuery(s.Schema, "BlobLinksInsertOrIgnore", sgen.QueryKindExec,