diff --git a/packages/ipfs-unixfs-exporter/package.json b/packages/ipfs-unixfs-exporter/package.json
index 397f3105..3f15f688 100644
--- a/packages/ipfs-unixfs-exporter/package.json
+++ b/packages/ipfs-unixfs-exporter/package.json
@@ -45,6 +45,7 @@
     "it-all": "^1.0.1",
     "it-buffer-stream": "^1.0.2",
     "it-first": "^1.0.1",
+    "merge-options": "^3.0.3",
     "multicodec": "^2.0.0",
     "nyc": "^15.0.0",
     "sinon": "^9.0.1",
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt b/packages/ipfs-unixfs-exporter/test/fixtures/1.2MiB.txt
similarity index 100%
rename from packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt
rename to packages/ipfs-unixfs-exporter/test/fixtures/1.2MiB.txt
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt b/packages/ipfs-unixfs-exporter/test/fixtures/200Bytes.txt
similarity index 100%
rename from packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt
rename to packages/ipfs-unixfs-exporter/test/fixtures/200Bytes.txt
diff --git a/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js b/packages/ipfs-unixfs-exporter/test/helpers/collect-leaf-cids.js
similarity index 100%
rename from packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js
rename to packages/ipfs-unixfs-exporter/test/helpers/collect-leaf-cids.js
diff --git a/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js b/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js
deleted file mode 100644
index 836ac9e9..00000000
--- a/packages/ipfs-unixfs-exporter/test/helpers/create-shard.js
+++ /dev/null
@@ -1,33 +0,0 @@
-'use strict'
-
-const importer = require('ipfs-unixfs-importer')
-
-const SHARD_SPLIT_THRESHOLD = 10
-
-const createShard = (numFiles, ipld) => {
-  return createShardWithFileNames(numFiles, (index) => `file-${index}`, ipld)
-}
-
-const createShardWithFileNames = (numFiles, fileName, ipld) => {
-  const files = new Array(numFiles).fill(0).map((_, index) => ({
-    path: fileName(index),
-    content: Uint8Array.from([0, 1, 2, 3, 4, index])
-  }))
-
-  return createShardWithFiles(files, ipld)
-}
-
-const createShardWithFiles = async (files, ipld) => {
-  let last
-
-  for await (const imported of importer(ipld, files, {
-    shardSplitThreshold: SHARD_SPLIT_THRESHOLD,
-    wrap: true
-  })) {
-    last = imported
-  }
-
-  return last.cid
-}
-
-module.exports = createShard
diff --git a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js
similarity index 99%
rename from packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
rename to packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js
index 32ae8641..bc5af3b9 100644
--- a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
+++ b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js
@@ -1,8 +1,8 @@
 /* eslint-env mocha */
 'use strict'
 
-const importer = require('../src')
-const exporter = require('ipfs-unixfs-exporter')
+const importer = require('ipfs-unixfs-importer')
+const exporter = require('../src')
 
 const { expect } = require('aegir/utils/chai')
 const IPLD = require('ipld')
diff --git a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js b/packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js
similarity index 97%
rename from packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
rename to packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js
index 79d747f3..1a5e7831 100644
--- a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
+++ b/packages/ipfs-unixfs-exporter/test/import-export-nested-dir.spec.js
@@ -5,8 +5,8 @@ const { expect } = require('aegir/utils/chai')
 const IPLD = require('ipld')
 const inMemory = require('ipld-in-memory')
 const all = require('it-all')
-const importer = require('../src')
-const exporter = require('ipfs-unixfs-exporter')
+const importer = require('ipfs-unixfs-importer')
+const exporter = require('../src')
 const blockApi = require('./helpers/block')
 const uint8ArrayFromString = require('uint8arrays/from-string')
 const uint8ArrayToString = require('uint8arrays/to-string')
diff --git a/packages/ipfs-unixfs-importer/test/import-export.spec.js b/packages/ipfs-unixfs-exporter/test/import-export.spec.js
similarity index 93%
rename from packages/ipfs-unixfs-importer/test/import-export.spec.js
rename to packages/ipfs-unixfs-exporter/test/import-export.spec.js
index 0d436b46..d529b85d 100644
--- a/packages/ipfs-unixfs-importer/test/import-export.spec.js
+++ b/packages/ipfs-unixfs-exporter/test/import-export.spec.js
@@ -10,8 +10,8 @@ const isNode = require('detect-node')
 const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt')
 const blockApi = require('./helpers/block')
 
-const importer = require('../src')
-const exporter = require('ipfs-unixfs-exporter')
+const importer = require('ipfs-unixfs-importer')
+const exporter = require('../src')
 
 const strategies = [
   'flat',
diff --git a/packages/ipfs-unixfs-importer/test/importer.spec.js b/packages/ipfs-unixfs-exporter/test/importer.spec.js
similarity index 99%
rename from packages/ipfs-unixfs-importer/test/importer.spec.js
rename to packages/ipfs-unixfs-exporter/test/importer.spec.js
index 33696cb1..b67a954b 100644
--- a/packages/ipfs-unixfs-importer/test/importer.spec.js
+++ b/packages/ipfs-unixfs-exporter/test/importer.spec.js
@@ -1,8 +1,8 @@
 /* eslint-env mocha */
 'use strict'
 
-const importer = require('../src')
-const exporter = require('ipfs-unixfs-exporter')
+const importer = require('ipfs-unixfs-importer')
+const exporter = require('../src')
 const extend = require('merge-options')
 const { expect } = require('aegir/utils/chai')
 const spy = require('sinon/lib/sinon/spy')
diff --git a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
index b5c84d0d..17a19c17 100644
--- a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
+++ b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
@@ -3,10 +3,8 @@
 
 const chunker = require('../src/chunker/fixed-size')
 const { expect } = require('aegir/utils/chai')
-const isNode = require('detect-node')
 const all = require('it-all')
-const loadFixture = require('aegir/fixtures')
-const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+const rawFile = new Uint8Array(Math.pow(2, 20))
 const uint8ArrayFromString = require('uint8arrays/from-string')
 const uint8ArrayConcat = require('uint8arrays/concat')
 
diff --git a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
index e0814e3b..3c61d7d8 100644
--- a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
+++ b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
@@ -3,13 +3,11 @@
 
 const chunker = require('../src/chunker/rabin')
 const { expect } = require('aegir/utils/chai')
-const loadFixture = require('aegir/fixtures')
-const isNode = require('detect-node')
 const all = require('it-all')
 const uint8ArrayFromString = require('uint8arrays/from-string')
 const uint8ArrayConcat = require('uint8arrays/concat')
 
-const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+const rawFile = new Uint8Array(Math.pow(2, 20)).fill(1)
 
 describe('chunker: rabin', function () {
   this.timeout(30000)
@@ -71,9 +69,9 @@ describe('chunker: rabin', function () {
     const file = uint8ArrayConcat([rawFile, uint8ArrayFromString('hello')])
     const opts = {
       ...defaultOptions,
-      minChunkSize: KiB256 / 3,
+      minChunkSize: Math.round(KiB256 / 3),
       avgChunkSize: KiB256,
-      maxChunkSize: KiB256 + (KiB256 / 2)
+      maxChunkSize: Math.round(KiB256 + (KiB256 / 2))
     }
 
     const chunks = await all(chunker([file], opts))
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block
deleted file mode 100644
index f57749f0..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0
deleted file mode 100644
index a6e00f34..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1
deleted file mode 100644
index f4c039c2..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2
deleted file mode 100644
index 64ce0aeb..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3
deleted file mode 100644
index c1f9899a..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4
deleted file mode 100644
index cbd601a6..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
deleted file mode 100644
index e7229e0e..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
+++ /dev/null
@@ -1 +0,0 @@
-äL €€ €€ €€ €€ ä
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0
deleted file mode 100644
index 36ff3333..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1
deleted file mode 100644
index fa626274..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2
deleted file mode 100644
index f7ea5c2e..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3
deleted file mode 100644
index de99ffe5..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4
deleted file mode 100644
index 0e438a15..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt
deleted file mode 100644
index 60770c23..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
deleted file mode 100644
index a655cf83..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
+++ /dev/null
@@ -1,5 +0,0 @@
-
-ÐÈ±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼È
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
deleted file mode 100644
index b93a6da8..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
+++ /dev/null
@@ -1,4 +0,0 @@
-È±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼È
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
deleted file mode 100644
index ce734230..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
+++ /dev/null
@@ -1,4 +0,0 @@
-4
-" siÝö¹»"­Â¹Wë<§ö¦óG|…¶eòµ4	 3
-1.2MiB.txtÎæL
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir
deleted file mode 100644
index e19a122a..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt
deleted file mode 100644
index 6e306c55..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block
deleted file mode 100644
index e3ec206f..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block
+++ /dev/null
@@ -1,5 +0,0 @@
-5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ3
-" Y”„9_)ažô€Ë¹2¾RÅm™Å–keà9ð˜»ïdir-another0
-" TyÃ5;_9Yf»q€ƒFóLhylóœÐ/Éílevel-1Ã
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
deleted file mode 100644
index 5accb645..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
+++ /dev/null
@@ -1,3 +0,0 @@
-5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
deleted file mode 100644
index e19a122a..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/empty.txt b/packages/ipfs-unixfs-importer/test/fixtures/empty.txt
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt
deleted file mode 100644
index 6e306c55..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv
deleted file mode 100644
index 55e83f48..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt
deleted file mode 100644
index 6e306c55..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt
deleted file mode 100644
index 6e306c55..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt
deleted file mode 100644
index 6e306c55..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
deleted file mode 100644
index d95023c7..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/small.txt b/packages/ipfs-unixfs-importer/test/fixtures/small.txt
deleted file mode 100644
index f81fce04..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/small.txt
+++ /dev/null
@@ -1 +0,0 @@
-this is a file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
deleted file mode 100644
index b3ab23d1..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
+++ /dev/null
@@ -1,20361 +0,0 @@
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
deleted file mode 100644
index f9810363..00000000
--- a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
+++ /dev/null
@@ -1,4728 +0,0 @@
-
-Š€€€There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv
deleted file mode 100644
index 55e83f48..00000000
Binary files a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js b/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js
deleted file mode 100644
index e69de29b..00000000