diff --git a/packages/ipfs-unixfs-exporter/package.json b/packages/ipfs-unixfs-exporter/package.json index 397f3105..3f15f688 100644 --- a/packages/ipfs-unixfs-exporter/package.json +++ b/packages/ipfs-unixfs-exporter/package.json @@ -45,6 +45,7 @@ "it-all": "^1.0.1", "it-buffer-stream": "^1.0.2", "it-first": "^1.0.1", + "merge-options": "^3.0.3", "multicodec": "^2.0.0", "nyc": "^15.0.0", "sinon": "^9.0.1", diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt b/packages/ipfs-unixfs-exporter/test/fixtures/1.2MiB.txt similarity index 100% rename from packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt rename to packages/ipfs-unixfs-exporter/test/fixtures/1.2MiB.txt diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt b/packages/ipfs-unixfs-exporter/test/fixtures/200Bytes.txt similarity index 100% rename from packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt rename to packages/ipfs-unixfs-exporter/test/fixtures/200Bytes.txt diff --git a/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js b/packages/ipfs-unixfs-exporter/test/helpers/collect-leaf-cids.js similarity index 100% rename from packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js rename to packages/ipfs-unixfs-exporter/test/helpers/collect-leaf-cids.js diff --git a/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js b/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js deleted file mode 100644 index 836ac9e9..00000000 --- a/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js +++ /dev/null @@ -1,33 +0,0 @@ -'use strict' - -const importer = require('ipfs-unixfs-importer') - -const SHARD_SPLIT_THRESHOLD = 10 - -const createShard = (numFiles, ipld) => { - return createShardWithFileNames(numFiles, (index) => `file-${index}`, ipld) -} - -const createShardWithFileNames = (numFiles, fileName, ipld) => { - const files = new Array(numFiles).fill(0).map((_, index) => ({ - path: fileName(index), - content: Uint8Array.from([0, 1, 2, 3, 4, index]) - })) - - return createShardWithFiles(files, ipld) -} - -const createShardWithFiles = async (files, ipld) => { - let last - - for await (const imported of importer(ipld, files, { - shardSplitThreshold: SHARD_SPLIT_THRESHOLD, - wrap: true - })) { - last = imported - } - - return last.cid -} - -module.exports = createShard diff --git a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js similarity index 99% rename from packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js rename to packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js index 32ae8641..bc5af3b9 100644 --- a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js +++ b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js @@ -1,8 +1,8 @@ /* eslint-env mocha */ 'use strict' -const importer = require('../src') -const exporter = require('ipfs-unixfs-exporter') +const importer = require('ipfs-unixfs-importer') +const exporter = require('../src') const { expect } = require('aegir/utils/chai') const IPLD = require('ipld') diff --git a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js b/packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js similarity index 97% rename from packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js rename to packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js index 79d747f3..1a5e7831 100644 --- a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js +++ b/packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js @@ -5,8 +5,8 @@ const { expect } = require('aegir/utils/chai') const IPLD = require('ipld') const inMemory = require('ipld-in-memory') const all = require('it-all') -const importer = require('../src') -const exporter = require('ipfs-unixfs-exporter') +const importer = require('ipfs-unixfs-importer') +const exporter = require('../src') const blockApi = require('./helpers/block') const uint8ArrayFromString = require('uint8arrays/from-string') const uint8ArrayToString = require('uint8arrays/to-string') diff --git a/packages/ipfs-unixfs-importer/test/import-export.spec.js b/packages/ipfs-unixfs-exporter/test/import-export.spec.js similarity index 93% rename from packages/ipfs-unixfs-importer/test/import-export.spec.js rename to packages/ipfs-unixfs-exporter/test/import-export.spec.js index 0d436b46..d529b85d 100644 --- a/packages/ipfs-unixfs-importer/test/import-export.spec.js +++ b/packages/ipfs-unixfs-exporter/test/import-export.spec.js @@ -10,8 +10,8 @@ const isNode = require('detect-node') const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt') const blockApi = require('./helpers/block') -const importer = require('../src') -const exporter = require('ipfs-unixfs-exporter') +const importer = require('ipfs-unixfs-importer') +const exporter = require('../src') const strategies = [ 'flat', diff --git a/packages/ipfs-unixfs-importer/test/importer.spec.js b/packages/ipfs-unixfs-exporter/test/importer.spec.js similarity index 99% rename from packages/ipfs-unixfs-importer/test/importer.spec.js rename to packages/ipfs-unixfs-exporter/test/importer.spec.js index 33696cb1..b67a954b 100644 --- a/packages/ipfs-unixfs-importer/test/importer.spec.js +++ b/packages/ipfs-unixfs-exporter/test/importer.spec.js @@ -1,8 +1,8 @@ /* eslint-env mocha */ 'use strict' -const importer = require('../src') -const exporter = require('ipfs-unixfs-exporter') +const importer = require('ipfs-unixfs-importer') +const exporter = require('../src') const extend = require('merge-options') const { expect } = require('aegir/utils/chai') const spy = require('sinon/lib/sinon/spy') diff --git a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js index b5c84d0d..17a19c17 100644 --- a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js +++ b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js @@ -3,10 +3,8 @@ const chunker = require('../src/chunker/fixed-size') const { expect } = require('aegir/utils/chai') -const isNode = require('detect-node') const all = require('it-all') -const loadFixture = require('aegir/fixtures') -const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt') +const rawFile = new Uint8Array(Math.pow(2, 20)) const uint8ArrayFromString = require('uint8arrays/from-string') const uint8ArrayConcat = require('uint8arrays/concat') diff --git a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js index e0814e3b..3c61d7d8 100644 --- a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js +++ b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js @@ -3,13 +3,11 @@ const chunker = require('../src/chunker/rabin') const { expect } = require('aegir/utils/chai') -const loadFixture = require('aegir/fixtures') -const isNode = require('detect-node') const all = require('it-all') const uint8ArrayFromString = require('uint8arrays/from-string') const uint8ArrayConcat = require('uint8arrays/concat') -const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt') +const rawFile = new Uint8Array(Math.pow(2, 20)).fill(1) describe('chunker: rabin', function () { this.timeout(30000) @@ -71,9 +69,9 @@ describe('chunker: rabin', function () { const file = uint8ArrayConcat([rawFile, uint8ArrayFromString('hello')]) const opts = { ...defaultOptions, - minChunkSize: KiB256 / 3, + minChunkSize: Math.round(KiB256 / 3), avgChunkSize: KiB256, - maxChunkSize: KiB256 + (KiB256 / 2) + maxChunkSize: Math.round(KiB256 + (KiB256 / 2)) } const chunks = await all(chunker([file], opts)) diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block deleted file mode 100644 index f57749f0..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 deleted file mode 100644 index a6e00f34..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 deleted file mode 100644 index f4c039c2..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 deleted file mode 100644 index 64ce0aeb..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 deleted file mode 100644 index c1f9899a..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 deleted file mode 100644 index cbd601a6..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file deleted file mode 100644 index e7229e0e..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file +++ /dev/null @@ -1 +0,0 @@ -L     \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 deleted file mode 100644 index 36ff3333..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 deleted file mode 100644 index fa626274..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 deleted file mode 100644 index f7ea5c2e..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 deleted file mode 100644 index de99ffe5..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 deleted file mode 100644 index 0e438a15..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt deleted file mode 100644 index 60770c23..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block deleted file mode 100644 index a655cf83..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block +++ /dev/null @@ -1,5 +0,0 @@ - -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file deleted file mode 100644 index b93a6da8..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block deleted file mode 100644 index ce734230..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block +++ /dev/null @@ -1,4 +0,0 @@ -4 -" si"¹W%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt deleted file mode 100644 index d95023c7..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block deleted file mode 100644 index 5accb645..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block +++ /dev/null @@ -1,3 +0,0 @@ -5 -" $G,A4{xZ/.D` 200Bytes.txt - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir deleted file mode 100644 index e19a122a..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt deleted file mode 100644 index d95023c7..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/ipfs-unixfs-importer/test/fixtures/empty.txt b/packages/ipfs-unixfs-importer/test/fixtures/empty.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt deleted file mode 100644 index 6e306c55..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt deleted file mode 100644 index d95023c7..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv deleted file mode 100644 index 55e83f48..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt deleted file mode 100644 index 6e306c55..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt deleted file mode 100644 index 6e306c55..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt deleted file mode 100644 index d95023c7..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt deleted file mode 100644 index 6e306c55..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt deleted file mode 100644 index d95023c7..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt +++ /dev/null @@ -1,4 +0,0 @@ -wxxM{ -DzH/&^ RS/v,R -=Ng~pf1\[>%U1@Q׀2&m6qQ؁]|!K E~J ֕읝ojbn3eT)D+;s -컓:Ty!c3\*T7E?[Pv}A+cx~e \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/small.txt b/packages/ipfs-unixfs-importer/test/fixtures/small.txt deleted file mode 100644 index f81fce04..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/small.txt +++ /dev/null @@ -1 +0,0 @@ -this is a file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt deleted file mode 100644 index b3ab23d1..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt +++ /dev/null @@ -1,20361 +0,0 @@ -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - - diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 deleted file mode 100644 index f9810363..00000000 --- a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 +++ /dev/null @@ -1,4728 +0,0 @@ - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough” system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files” ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.” -Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv deleted file mode 100644 index 55e83f48..00000000 Binary files a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js b/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js deleted file mode 100644 index e69de29b..00000000