chore: add pinning benchmark (#633)

Split a new benchmark out of the gc one that just focusses on pinning performance.
ipfs · Sep 16, 2024 · 8805202 · 8805202
1 parent 9de08ef
commit 8805202
Show file tree

Hide file tree

Showing 20 changed files with 473 additions and 122 deletions.
diff --git a/benchmarks/add-dir/package.json b/benchmarks/add-dir/package.json
@@ -11,25 +11,20 @@
     "dep-check": "aegir dep-check",
     "start": "npm run build && node dist/src/index.js"
   },
-  "devDependencies": {
-    "@chainsafe/libp2p-noise": "^15.0.0",
-    "@chainsafe/libp2p-yamux": "^6.0.1",
+  "dependencies": {
     "@helia/unixfs": "^3.0.4",
-    "@ipld/dag-pb": "^4.0.2",
-    "@libp2p/websockets": "^8.0.9",
     "aegir": "^44.0.1",
-    "blockstore-fs": "^1.0.1",
-    "datastore-level": "^10.0.1",
+    "blockstore-core": "^5.0.2",
+    "blockstore-fs": "^2.0.1",
+    "datastore-core": "^10.0.2",
+    "datastore-level": "^11.0.1",
     "execa": "^8.0.1",
     "helia": "^4.1.1",
     "ipfs-unixfs-importer": "^15.1.5",
     "ipfsd-ctl": "^15.0.0",
-    "it-all": "^3.0.4",
-    "it-drain": "^3.0.5",
-    "it-map": "^3.0.5",
-    "kubo": "^0.28.0",
+    "it-last": "^3.0.6",
+    "kubo": "^0.30.0",
     "kubo-rpc-client": "^5.0.0",
-    "libp2p": "^1.1.0",
     "multiformats": "^13.1.0",
     "tinybench": "^2.4.0"
   }

diff --git a/benchmarks/add-dir/src/index.ts b/benchmarks/add-dir/src/index.ts
@@ -1,24 +1,22 @@
 /* eslint-disable no-console,no-loop-func */
 
 import nodePath from 'node:path'
-import debug from 'debug'
 import { CID } from 'multiformats/cid'
 import { Bench } from 'tinybench'
 import { createHeliaBenchmark } from './helia.js'
 import { createKuboDirectBenchmark } from './kubo-direct.js'
 import { createKuboBenchmark } from './kubo.js'
 
-const log = debug('bench:add-dir')
 const ITERATIONS = parseInt(process.env.ITERATIONS ?? '5')
 const MIN_TIME = parseInt(process.env.MIN_TIME ?? '1')
 const TEST_PATH = process.env.TEST_PATH
 const RESULT_PRECISION = 2
 
 export interface AddDirBenchmark {
-  teardown: () => Promise<void>
-  addFile?: (path: string) => Promise<CID>
-  addDir: (path: string) => Promise<CID>
-  getSize?: (cid: CID) => Promise<bigint>
+  teardown(): Promise<void>
+  addFile?(path: string): Promise<CID>
+  addDir(path: string): Promise<CID>
+  getSize?(cid: CID): Promise<bigint>
 }
 
 interface BenchmarkTaskResult {
@@ -33,7 +31,7 @@ const getDefaultResults = (): BenchmarkTaskResult => ({
   sizes: new Map<string, Set<string>>()
 })
 
-const impls: Array<{ name: string, create: () => Promise<AddDirBenchmark>, results: BenchmarkTaskResult }> = [
+const impls: Array<{ name: string, create(): Promise<AddDirBenchmark>, results: BenchmarkTaskResult }> = [
   {
     name: 'helia-fs',
     create: async () => createHeliaBenchmark(),
@@ -63,19 +61,15 @@ async function main (): Promise<void> {
     iterations: ITERATIONS,
     time: MIN_TIME,
     setup: async (task) => {
-      log('Start: setup')
       const impl = impls.find(({ name }) => task.name.includes(name))
       if (impl != null) {
         subject = await impl.create()
       } else {
         throw new Error(`No implementation with name '${task.name}'`)
       }
-      log('End: setup')
     },
     teardown: async () => {
-      log('Start: teardown')
       await subject.teardown()
-      log('End: teardown')
     }
   })
 
@@ -99,11 +93,7 @@ async function main (): Promise<void> {
         impl.results.cids.set(testPath, cidSet)
       },
       {
-        beforeEach: async () => {
-          log(`Start: test ${impl.name}`)
-        },
         afterEach: async () => {
-          log(`End: test ${impl.name}`)
           const cidSet = impl.results.cids.get(testPath)
           if (cidSet != null) {
             for (const cid of cidSet.values()) {

diff --git a/benchmarks/add-dir/src/kubo-direct.ts b/benchmarks/add-dir/src/kubo-direct.ts
@@ -1,9 +1,9 @@
-import { execa } from 'execa'
-import { path as kuboPath } from 'kubo'
-import { CID } from 'multiformats/cid'
 import { promises as fsPromises } from 'node:fs'
 import os from 'node:os'
 import nodePath from 'node:path'
+import { execa } from 'execa'
+import { path as kuboPath } from 'kubo'
+import { CID } from 'multiformats/cid'
 import type { AddDirBenchmark } from './index.js'
 
 export async function createKuboDirectBenchmark (): Promise<AddDirBenchmark> {
@@ -15,7 +15,7 @@ export async function createKuboDirectBenchmark (): Promise<AddDirBenchmark> {
     const { stdout } = await execa(kuboPath(), ['--repo-dir', repoDir, 'add', '-r', '--pin=false', dir])
     const lines = stdout.split('\n')
     const lastLine = lines.pop()
-    const cid = CID.parse(lastLine?.split(' ')[1] as string)
+    const cid = CID.parse(lastLine?.split(' ')[1] ?? '')
 
     return cid
   }

diff --git a/benchmarks/add-dir/src/kubo.ts b/benchmarks/add-dir/src/kubo.ts
@@ -1,11 +1,11 @@
+import fs, { promises as fsPromises } from 'node:fs'
+import nodePath from 'node:path'
 import { createNode } from 'ipfsd-ctl'
 import last from 'it-last'
 import { path as kuboPath } from 'kubo'
 import { globSource, create as kuboRpcClient } from 'kubo-rpc-client'
-import type { CID } from 'multiformats/cid'
-import fs, { promises as fsPromises } from 'node:fs'
-import nodePath from 'node:path'
 import type { AddDirBenchmark } from './index.js'
+import type { CID } from 'multiformats/cid'
 
 export async function createKuboBenchmark (): Promise<AddDirBenchmark> {
   const controller = await createNode({
@@ -19,7 +19,7 @@ export async function createKuboBenchmark (): Promise<AddDirBenchmark> {
   })
 
   const addFile = async (path: string): Promise<CID> => (await controller.api.add({
-    path: nodePath.relative(process.cwd(),path),
+    path: nodePath.relative(process.cwd(), path),
     content: fs.createReadStream(path)
   }, {
     cidVersion: 1,

diff --git a/benchmarks/gc/package.json b/benchmarks/gc/package.json
@@ -9,18 +9,18 @@
     "dep-check": "aegir dep-check",
     "start": "npm run build && node dist/src/index.js"
   },
-  "devDependencies": {
+  "dependencies": {
     "@ipld/dag-pb": "^4.0.6",
     "aegir": "^44.0.1",
-    "blockstore-fs": "^1.1.8",
-    "datastore-level": "^10.1.5",
+    "blockstore-fs": "^2.0.1",
+    "datastore-level": "^11.0.1",
     "execa": "^8.0.1",
     "helia": "^4.1.1",
     "ipfsd-ctl": "^15.0.0",
     "it-all": "^3.0.4",
     "it-drain": "^3.0.5",
     "it-map": "^3.0.5",
-    "kubo": "^0.28.0",
+    "kubo": "^0.30.0",
     "kubo-rpc-client": "^5.0.0",
     "multiformats": "^13.0.0",
     "tinybench": "^2.5.1"

diff --git a/benchmarks/gc/src/index.ts b/benchmarks/gc/src/index.ts
@@ -14,13 +14,13 @@ const ITERATIONS = parseInt(process.env.ITERATIONS ?? '5')
 const RESULT_PRECISION = 2
 
 export interface GcBenchmark {
-  gc: () => Promise<void>
-  teardown: () => Promise<void>
-  pin: (cid: CID) => Promise<void>
-  putBlocks: (blocks: Array<{ key: CID, value: Uint8Array }>) => Promise<void>
-  clearPins: () => Promise<number>
-  isPinned: (cid: CID) => Promise<boolean>
-  hasBlock: (cid: CID) => Promise<boolean>
+  gc(): Promise<void>
+  teardown(): Promise<void>
+  pin(cid: CID): Promise<void>
+  putBlocks(blocks: Array<{ key: CID, value: Uint8Array }>): Promise<void>
+  clearPins(): Promise<number>
+  isPinned(cid: CID): Promise<boolean>
+  hasBlock(cid: CID): Promise<boolean>
 }
 
 const blocks: Array<{ key: CID, value: Uint8Array }> = []
@@ -91,7 +91,7 @@ async function pinBlocks (benchmark: GcBenchmark): Promise<void> {
   }
 }
 
-const impls: Array<{ name: string, create: () => Promise<GcBenchmark>, results: { gc: number[], clearedPins: number[], addedBlocks: number[], pinnedBlocks: number[] } }> = [{
+const impls: Array<{ name: string, create(): Promise<GcBenchmark>, results: { gc: number[], clearedPins: number[], addedBlocks: number[], pinnedBlocks: number[] } }> = [{
   name: 'helia',
   create: async () => createHeliaBenchmark(),
   results: {

diff --git a/benchmarks/pinning/README.md b/benchmarks/pinning/README.md
@@ -0,0 +1,45 @@
+# Pinning Benchmark
+
+Benchmarks Helia pinning performance against Kubo
+
+- Removes any existing pins
+- Creates 10000 DAGs with two nodes linked to by a root node that is pinned
+
+All three implementations use on-disk block/datastores to ensure a reasonable basis for comparison.
+
+Warning! It can take a long time with realistic pinset sizes - on the order of a whole day.
+
+To run:
+
+1. Add `benchmarks/*` to the `workspaces` entry in the root `package.json` of this repo
+2. Run
+    ```console
+    $ npm run reset
+    $ npm i
+    $ npm run build
+    $ cd benchmarks/pinning
+    $ npm start
+
+    > [email protected] start
+    > npm run build && node dist/src/index.js
+
+
+    > [email protected] build
+    > aegir build --bundle false
+
+    [14:51:28] tsc [started]
+    [14:51:33] tsc [completed]
+    generating Ed25519 keypair...
+    ┌─────────┬────────────────┬─────────┬───────────┬──────┐
+    │ (index) │ Implementation │  ops/s  │   ms/op   │ runs │
+    ├─────────┼────────────────┼─────────┼───────────┼──────┤
+    //... results here
+    ```
+
+## Graph
+
+To output stats for a graph run:
+
+```console
+$ npm run build && node dist/src/graph.js
+```
diff --git a/benchmarks/pinning/package.json b/benchmarks/pinning/package.json
@@ -0,0 +1,29 @@
+{
+  "name": "benchmarks-pinning",
+  "version": "1.0.0",
+  "type": "module",
+  "scripts": {
+    "clean": "aegir clean",
+    "build": "aegir build --bundle false",
+    "lint": "aegir lint",
+    "dep-check": "aegir dep-check",
+    "start": "npm run build && node dist/src/index.js"
+  },
+  "dependencies": {
+    "@ipld/dag-pb": "^4.0.6",
+    "aegir": "^44.0.1",
+    "blockstore-fs": "^2.0.1",
+    "datastore-level": "^11.0.1",
+    "execa": "^8.0.1",
+    "helia": "^4.1.1",
+    "ipfsd-ctl": "^15.0.0",
+    "it-all": "^3.0.4",
+    "it-drain": "^3.0.5",
+    "it-map": "^3.0.5",
+    "kubo": "^0.30.0",
+    "kubo-rpc-client": "^5.0.0",
+    "multiformats": "^13.0.0",
+    "tinybench": "^2.5.1"
+  },
+  "private": true
+}
diff --git a/benchmarks/pinning/src/graph.ts b/benchmarks/pinning/src/graph.ts
@@ -0,0 +1,18 @@
+import { execa } from 'execa'
+
+const ITERATIONS = 2
+const INCREMENT = 1000
+const MAX = 10000
+
+for (let i = 1; i <= MAX / INCREMENT; i++) {
+  await execa('node', ['dist/src/index.js'], {
+    env: {
+      ...process.env,
+      INCREMENT: (i * INCREMENT).toString(),
+      ITERATIONS: ITERATIONS.toString(),
+      ITERATION: i.toString()
+    },
+    stdout: 'inherit',
+    stderr: 'inherit'
+  })
+}
diff --git a/benchmarks/pinning/src/helia.ts b/benchmarks/pinning/src/helia.ts
@@ -0,0 +1,51 @@
+import os from 'node:os'
+import path from 'node:path'
+import { FsBlockstore } from 'blockstore-fs'
+import { LevelDatastore } from 'datastore-level'
+import { createHelia } from 'helia'
+import all from 'it-all'
+import drain from 'it-drain'
+import map from 'it-map'
+import type { GcBenchmark } from './index.js'
+
+export async function createHeliaBenchmark (): Promise<GcBenchmark> {
+  const repoPath = path.join(os.tmpdir(), `helia-${Math.random()}`)
+
+  const helia = await createHelia({
+    blockstore: new FsBlockstore(`${repoPath}/blocks`),
+    datastore: new LevelDatastore(`${repoPath}/data`),
+    libp2p: {
+      addresses: {
+        listen: []
+      }
+    },
+    start: false
+  })
+
+  return {
+    async putBlocks (blocks) {
+      await drain(helia.blockstore.putMany(map(blocks, ({ key, value }) => ({ cid: key, block: value }))))
+    },
+    async pin (cid) {
+      await drain(helia.pins.add(cid))
+    },
+    async teardown () {
+      await helia.stop()
+    },
+    async clearPins () {
+      const pins = await all(helia.pins.ls())
+
+      for (const pin of pins) {
+        await drain(helia.pins.rm(pin.cid))
+      }
+
+      return pins.length
+    },
+    isPinned: async (cid) => {
+      return helia.pins.isPinned(cid)
+    },
+    hasBlock: async (cid) => {
+      return helia.blockstore.has(cid)
+    }
+  }
+}