Skip to content

Commit

Permalink
refactor(backend): define blob indexes with better locality
Browse files Browse the repository at this point in the history
  • Loading branch information
burdiyan committed Mar 1, 2024
1 parent 191beaf commit d3a9a4f
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 7 deletions.
6 changes: 6 additions & 0 deletions backend/daemon/storage/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ var migrations = []migration{
DELETE FROM kv WHERE key = 'last_reindex_time';
`))
}},
{Version: "2024-03-01.03", Run: func(_ *Dir, conn *sqlite.Conn) error {
return sqlitex.ExecScript(conn, sqlfmt(`
CREATE INDEX blobs_metadata ON blobs (id, multihash, codec, size, insert_time);
CREATE INDEX blobs_metadata_by_hash ON blobs (multihash, codec, size, insert_time);
`))
}},
}

const (
Expand Down
2 changes: 1 addition & 1 deletion backend/daemon/storage/schema.gensum
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
srcs: d768fdb89d3eb5d4bee55227ac36c42d
srcs: 31b832ee95aa3ac2dba0877815c96e4e
outs: 6e8f9aaea92a324bcd6776afeaa0230c
6 changes: 6 additions & 0 deletions backend/daemon/storage/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ CREATE TABLE blobs (
insert_time INTEGER DEFAULT (strftime('%s', 'now')) NOT NULL
);

-- Index for better data locality when we need to iterate over blobs without their data.
-- Without the index loading the entire list of blobs into memory takes forever,
-- because SQLite has to read way too many pages skipping the actual blob data.
CREATE INDEX blobs_metadata ON blobs (id, multihash, codec, size, insert_time);
CREATE INDEX blobs_metadata_by_hash ON blobs (multihash, codec, size, insert_time);

-- Stores some relevant attributes for structural blobs,
-- which are those blobs that we can understand more deeply than just an opaque blob.
CREATE TABLE structural_blobs (
Expand Down
5 changes: 3 additions & 2 deletions backend/hyper/blockstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
format "github.com/ipfs/go-ipld-format"
"github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)

func TestGet(t *testing.T) {
Expand Down Expand Up @@ -154,12 +155,12 @@ func TestAllKeysRespectsContext(t *testing.T) {

// consume 2, then cancel context.
v, ok := <-ch
require.Equal(t, keys[0], v)
require.True(t, ok)
require.True(t, slices.Contains(keys, v))

v, ok = <-ch
require.Equal(t, keys[1], v)
require.True(t, ok)
require.True(t, slices.Contains(keys, v))

cancel()

Expand Down
5 changes: 4 additions & 1 deletion backend/hyper/hypersql/queries.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions backend/hyper/hypersql/queries.gensum
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
srcs: 710260753f9d777d05b98094175d347d
outs: 9427f9db7ecb4403869c9cd98100e1cc
srcs: 800af070fac742f44f90f3142c9ffcdd
outs: ed8e2acc2115513302d723aab1d251e1
5 changes: 4 additions & 1 deletion backend/hyper/hypersql/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ func generateQueries() error {
s.BlobsCodec,
), '\n',
"FROM", s.Blobs, '\n',
"WHERE", s.BlobsSize, ">=", "0",
"LEFT JOIN", s.Drafts, "ON", s.DraftsBlob, "=", s.BlobsID, '\n',
"WHERE", s.BlobsSize, ">=", "0", '\n',
"AND", s.DraftsBlob, "IS NULL", '\n',
"ORDER BY", s.BlobsID,
),

qb.MakeQuery(s.Schema, "BlobLinksInsertOrIgnore", sgen.QueryKindExec,
Expand Down

0 comments on commit d3a9a4f

Please sign in to comment.